library("grid")
library("ggplot2")
library("gplots")
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
library("ggcorrplot")
library("gridExtra")
library("viridis")
## Loading required package: viridisLite
library("reshape2")
library("gridExtra")
library("RColorBrewer")
library("scales")
## 
## Attaching package: 'scales'
## The following object is masked from 'package:viridis':
## 
##     viridis_pal
library("stringr")
library("rstatix")
## 
## Attaching package: 'rstatix'
## The following object is masked from 'package:ggcorrplot':
## 
##     cor_pmat
## The following object is masked from 'package:stats':
## 
##     filter
library("dplyr")
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library("tidyr")
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:reshape2':
## 
##     smiths
source('./COVIDvaccines_PlottingFunctions.R')
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## The following object is masked from 'package:rstatix':
## 
##     select
sessionInfo()
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 18362)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.1252 
## [2] LC_CTYPE=English_United States.1252   
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.1252    
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] e1071_1.7-3        pheatmap_1.0.12    ggrepel_0.8.2      MASS_7.3-51.6     
##  [5] tidyr_1.1.1        dplyr_1.0.2        rstatix_0.6.0      stringr_1.4.0     
##  [9] scales_1.1.1       RColorBrewer_1.1-2 reshape2_1.4.4     viridis_0.5.1     
## [13] viridisLite_0.3.0  gridExtra_2.3      ggcorrplot_0.1.3   gplots_3.0.4      
## [17] ggplot2_3.3.2     
## 
## loaded via a namespace (and not attached):
##  [1] gtools_3.8.2       tidyselect_1.1.0   xfun_0.15          purrr_0.3.4       
##  [5] haven_2.3.1        carData_3.0-4      colorspace_1.4-1   vctrs_0.3.2       
##  [9] generics_0.0.2     htmltools_0.5.0    yaml_2.2.1         rlang_0.4.7       
## [13] pillar_1.4.6       foreign_0.8-80     glue_1.4.1         withr_2.2.0       
## [17] readxl_1.3.1       lifecycle_0.2.0    plyr_1.8.6         cellranger_1.1.0  
## [21] munsell_0.5.0      gtable_0.3.0       zip_2.1.0          caTools_1.18.0    
## [25] evaluate_0.14      knitr_1.29         rio_0.5.16         forcats_0.5.0     
## [29] class_7.3-17       curl_4.3           highr_0.8          broom_0.7.0       
## [33] Rcpp_1.0.5         KernSmooth_2.23-17 backports_1.1.9    gdata_2.18.0      
## [37] abind_1.4-5        hms_0.5.3          digest_0.6.25      stringi_1.4.6     
## [41] openxlsx_4.1.5     tools_4.0.2        bitops_1.0-6       magrittr_1.5      
## [45] tibble_3.0.3       crayon_1.3.4       car_3.0-9          pkgconfig_2.0.3   
## [49] ellipsis_0.3.1     data.table_1.13.0  rmarkdown_2.7      R6_2.4.1          
## [53] compiler_4.0.2
knitr::opts_chunk$set(fig.width=12, fig.height=8) 


mergedData <- readRDS(file = "mergedData.Rds")
demog.melt <- readRDS(file = "demog.melt.Rds")
# colors:   COVID-exp B5B2F1 (purple)             COVID-naive FFDFB1 (orange)

—————— First x subjects and no longitudinal ————————–

keepList <- paste0("CV-",sprintf("%03d", seq(1,43,1)))
keepList <- keepList[-which(keepList == "CV-013" |       # developed COVID shortly after 1st vaccination so unclear status
                              keepList == "CV-031" |       # received Moderna
                              keepList == "CV-038" |       # fully vaccinated transplant patient at enrollment
                              keepList == "CV-040" |       # enrolled late so don't have recent baseline or post 1st dose   
                              keepList == "CV-041" |       # incomplete timecourse, vax date 2 unknown and lost to followup? 
                              keepList == "CV-042")]       # received Moderna
mergedData <- mergedData[ mergedData$Record.ID %in% keepList, ]
temp <- mergedData[which(mergedData$Record.ID %in% keepList),] %>% group_by(Prior.COVID.infection., timeCategory) %>% get_summary_stats(type = 'common')
## Warning: Problem with `mutate()` input `data`.
## i Problem with `mutate()` input `ci`.
## i NaNs produced
## i Input `ci` is `abs(stats::qt(alpha/2, .data$n - 1) * .data$se)`.
## i Input `data` is `map(.data$data, .f, ...)`.
## Warning: Problem with `mutate()` input `ci`.
## i NaNs produced
## i Input `ci` is `abs(stats::qt(alpha/2, .data$n - 1) * .data$se)`.
## Warning: Problem with `mutate()` input `data`.
## i NaNs produced
## i Input `data` is `map(.data$data, .f, ...)`.
## Warning in stats::qt(alpha/2, .data$n - 1): NaNs produced
## Warning: Problem with `mutate()` input `data`.
## i Problem with `mutate()` input `ci`.
## i NaNs produced
## i Input `ci` is `abs(stats::qt(alpha/2, .data$n - 1) * .data$se)`.
## i Input `data` is `map(.data$data, .f, ...)`.
## Warning: Problem with `mutate()` input `ci`.
## i NaNs produced
## i Input `ci` is `abs(stats::qt(alpha/2, .data$n - 1) * .data$se)`.
## Warning: Problem with `mutate()` input `data`.
## i NaNs produced
## i Input `data` is `map(.data$data, .f, ...)`.
## Warning in stats::qt(alpha/2, .data$n - 1): NaNs produced
## Warning: Problem with `mutate()` input `data`.
## i Problem with `mutate()` input `ci`.
## i NaNs produced
## i Input `ci` is `abs(stats::qt(alpha/2, .data$n - 1) * .data$se)`.
## i Input `data` is `map(.data$data, .f, ...)`.
## Warning: Problem with `mutate()` input `ci`.
## i NaNs produced
## i Input `ci` is `abs(stats::qt(alpha/2, .data$n - 1) * .data$se)`.
## Warning: Problem with `mutate()` input `data`.
## i NaNs produced
## i Input `data` is `map(.data$data, .f, ...)`.
## Warning in stats::qt(alpha/2, .data$n - 1): NaNs produced
## Warning: Problem with `mutate()` input `data`.
## i Problem with `mutate()` input `ci`.
## i NaNs produced
## i Input `ci` is `abs(stats::qt(alpha/2, .data$n - 1) * .data$se)`.
## i Input `data` is `map(.data$data, .f, ...)`.
## Warning: Problem with `mutate()` input `ci`.
## i NaNs produced
## i Input `ci` is `abs(stats::qt(alpha/2, .data$n - 1) * .data$se)`.
## Warning: Problem with `mutate()` input `data`.
## i NaNs produced
## i Input `data` is `map(.data$data, .f, ...)`.
## Warning in stats::qt(alpha/2, .data$n - 1): NaNs produced
## Warning: Problem with `mutate()` input `data`.
## i Problem with `mutate()` input `ci`.
## i NaNs produced
## i Input `ci` is `abs(stats::qt(alpha/2, .data$n - 1) * .data$se)`.
## i Input `data` is `map(.data$data, .f, ...)`.
## Warning: Problem with `mutate()` input `ci`.
## i NaNs produced
## i Input `ci` is `abs(stats::qt(alpha/2, .data$n - 1) * .data$se)`.
## Warning: Problem with `mutate()` input `data`.
## i NaNs produced
## i Input `data` is `map(.data$data, .f, ...)`.
## Warning in stats::qt(alpha/2, .data$n - 1): NaNs produced
## Warning: Problem with `mutate()` input `data`.
## i Problem with `mutate()` input `ci`.
## i NaNs produced
## i Input `ci` is `abs(stats::qt(alpha/2, .data$n - 1) * .data$se)`.
## i Input `data` is `map(.data$data, .f, ...)`.
## Warning: Problem with `mutate()` input `ci`.
## i NaNs produced
## i Input `ci` is `abs(stats::qt(alpha/2, .data$n - 1) * .data$se)`.
## Warning: Problem with `mutate()` input `data`.
## i NaNs produced
## i Input `data` is `map(.data$data, .f, ...)`.
## Warning in stats::qt(alpha/2, .data$n - 1): NaNs produced
## Warning: Problem with `mutate()` input `data`.
## i Problem with `mutate()` input `ci`.
## i NaNs produced
## i Input `ci` is `abs(stats::qt(alpha/2, .data$n - 1) * .data$se)`.
## i Input `data` is `map(.data$data, .f, ...)`.
## Warning: Problem with `mutate()` input `ci`.
## i NaNs produced
## i Input `ci` is `abs(stats::qt(alpha/2, .data$n - 1) * .data$se)`.
## Warning: Problem with `mutate()` input `data`.
## i NaNs produced
## i Input `data` is `map(.data$data, .f, ...)`.
## Warning in stats::qt(alpha/2, .data$n - 1): NaNs produced
# write.csv(temp, file = "summaryStatistics.csv")

subsetData <- subset(mergedData, timeCategory == "Baseline")
table(subsetData$Prior.COVID.infection., subsetData$Sex)
##      
##       Female Male
##   No      10   11
##   Yes     10    5
table(subsetData$Prior.COVID.infection., subsetData$Race)
##      
##       Asian Black or African-American Caucasian
##   No      4                         1        16
##   Yes     2                         0        13
median(subsetData$DPO.covid, na.rm=T)
## [1] -282
range(subsetData$DPO.covid, na.rm=T)
## [1] -359   78
temp <- mergedData[which(mergedData$Record.ID %in% keepList), ]  
temp <- temp[-which(temp$timeCategory == "2 wks Post 2nd dose" | temp$timeCategory == "" | temp$shortForm == "5W" | temp$shortForm == "4M"),]
temp$timeCategory <- factor(temp$timeCategory, levels = c("Baseline", "Post 1st dose", "two Weeks", "Pre 2nd dose", "Post 2nd dose", "One month post\n2nd dose")) #,"Four months post\n2nd dose"))
if( anyNA(temp$timeCategory))  {   temp <- temp[-is.na(temp$timeCategory),]  }

ggplot(data = temp, aes(x = DPV, y = Record.ID, group = Record.ID)) + geom_vline(xintercept = 0, linetype = "dashed", alpha=0.5) + geom_path() + 
  geom_point(aes(fill = timeCategory, shape = timeCategory), size=4) + theme_bw()  + xlab("Days relative to vaccine dose 1") + ylab("") + 
  scale_shape_manual(values=c(18:25)) + scale_fill_viridis_d() + 
  theme(axis.text = element_text(color="black",size=16), axis.title = element_text(color="black",size=16), axis.text.y = element_blank()) + 
  scale_x_continuous(breaks = seq(-100,200,20)) 

# ggsave(filename = "./Images/Subject_timecourse_overview.pdf")

———————– FINAL DATA OBJECT ——————————–

mergedData <- mergedData[, -which(names(mergedData) == "Alias")]
# saveRDS(mergedData, file = "mergedData_postExclusions.Rds")
mergedData <- readRDS(file = "mergedData_postExclusions.Rds")

—————— Activated T cells analyses ————————–

subsetData <- subset(mergedData,  timeCategory != "two Weeks"& timeCategory != "2 wks post 2nd dose");
subsetData <- subset(subsetData, Record.ID != "CV-011" & Record.ID != "CV-012" )        # absence of Ki67 stain
subsetData$timeCategory <- factor(subsetData$timeCategory, levels = c("Baseline", "Post 1st dose", "Pre 2nd dose", "Post 2nd dose", "One month post\n2nd dose"))
prePostTime(subsetData, xData = "DPV", yData="CD4_.CD38.Ki67._FreqParent", fillParam = "Prior.COVID.infection.", groupby="Record.ID", title = "CD4 - Post 1st dose",
            xLabel = "Days", yLabel = "Ki67+CD38+ (% CD4)", repMeasures = F, exponential=F) +  coord_cartesian(xlim = c(-1,12)) + geom_vline(xintercept = 0,linetype="dashed", alpha=0.5)
## [1] "block2"
## Warning: Removed 10 row(s) containing missing values (geom_path).
## Warning: Removed 11 rows containing missing values (geom_point).

# ggsave(filename = "./Images/ActivCD4_bothCohorts_Vax1_continuousTime.pdf")
prePostTime(subsetData, xData = "DPV", yData="CD8_.CD38.Ki67._FreqParent", fillParam = "Prior.COVID.infection.", groupby="Record.ID", title = "CD8 - Post 1st dose",
            xLabel = "Days", yLabel = "Ki67+CD38+ (% CD8)", repMeasures = F, exponential=F) +  coord_cartesian(xlim = c(-1,12)) + geom_vline(xintercept = 0,linetype="dashed" , alpha=0.5)
## [1] "block2"
## Warning: Removed 10 row(s) containing missing values (geom_path).

## Warning: Removed 11 rows containing missing values (geom_point).

# ggsave(filename = "./Images/ActivCD8_bothCohorts_Vax1_continuousTime.pdf")
subsetData$DPV <- subsetData$DPV - as.numeric(difftime(subsetData$Vaccine.2.date, subsetData$Vaccine.1.date, units="days" ) )
prePostTime(subsetData, xData = "DPV", yData="CD4_.CD38.Ki67._FreqParent", fillParam = "Prior.COVID.infection.", groupby="Record.ID", title = "CD4 - Post 2nd dose",
            xLabel = "Days", yLabel = "Ki67+CD38+ (% CD4)", repMeasures = F, exponential=F) +  coord_cartesian(xlim = c(-5,12)) + geom_vline(xintercept = 0,linetype="dashed", alpha=0.5)
## [1] "block2"
## Warning: Removed 10 row(s) containing missing values (geom_path).

## Warning: Removed 11 rows containing missing values (geom_point).

# ggsave(filename = "./Images/ActivCD4_bothCohorts_Vax2_continuousTime.pdf")
prePostTime(subsetData, xData = "DPV", yData="CD8_.CD38.Ki67._FreqParent", fillParam = "Prior.COVID.infection.", groupby="Record.ID", title = "CD8 - Post 2nd dose",
            xLabel = "Days", yLabel = "Ki67+CD38+ (% CD8)", repMeasures = F, exponential=F) +  coord_cartesian(xlim = c(-5,12))  + geom_vline(xintercept = 0,linetype="dashed" , alpha=0.5)
## [1] "block2"
## Warning: Removed 10 row(s) containing missing values (geom_path).

## Warning: Removed 11 rows containing missing values (geom_point).

# ggsave(filename = "./Images/ActivCD8_bothCohorts_Vax2_continuousTime.pdf")

*************** activated CD4 ********************

a <- prePostTime(data = subsetData, xData = "timeCategory", yData="CD4_.CD38.Ki67._FreqParent", fillParam = "Prior.COVID.infection.", groupby="Record.ID", title = "Activated CD4", 
            xLabel = " ", yLabel = "Ki67+CD38+ (% CD4)", repMeasures = F, exponential=F, newform = T)   ; a
## [1] "block3"
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## `summarise()` regrouping output by 'timeCategory' (override with `.groups` argument)
## Warning: Removed 10 row(s) containing missing values (geom_path).
## Warning: Removed 11 rows containing missing values (geom_point).

# ggsave(filename = "./Images/ActivCD4_bothCohorts_overTime.pdf")

bartlett.test(CD4_.CD38.Ki67._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## 
##  Bartlett test of homogeneity of variances
## 
## data:  CD4_.CD38.Ki67._FreqParent by timeCategory
## Bartlett's K-squared = 9.5434, df = 4, p-value = 0.04886
kruskal_test(formula = CD4_.CD38.Ki67._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No')) 
## # A tibble: 1 x 6
##   .y.                            n statistic    df        p method        
## * <chr>                      <int>     <dbl> <int>    <dbl> <chr>         
## 1 CD4_.CD38.Ki67._FreqParent    92      19.3     4 0.000691 Kruskal-Wallis
dunn_test(CD4_.CD38.Ki67._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## # A tibble: 10 x 9
##    .y.      group1  group2       n1    n2 statistic       p   p.adj p.adj.signif
##  * <chr>    <chr>   <chr>     <int> <int>     <dbl>   <dbl>   <dbl> <chr>       
##  1 CD4_.CD~ Baseli~ "Post 1s~    17    19     3.50  4.63e-4 0.00417 **          
##  2 CD4_.CD~ Baseli~ "Pre 2nd~    17    18     1.83  6.74e-2 0.404   ns          
##  3 CD4_.CD~ Baseli~ "Post 2n~    17    18     2.81  5.01e-3 0.0401  *           
##  4 CD4_.CD~ Baseli~ "One mon~    17    16     3.88  1.05e-4 0.00105 **          
##  5 CD4_.CD~ Post 1~ "Pre 2nd~    19    18    -1.67  9.43e-2 0.471   ns          
##  6 CD4_.CD~ Post 1~ "Post 2n~    19    18    -0.668 5.04e-1 1       ns          
##  7 CD4_.CD~ Post 1~ "One mon~    19    16     0.536 5.92e-1 1       ns          
##  8 CD4_.CD~ Pre 2n~ "Post 2n~    18    18     0.992 3.21e-1 0.970   ns          
##  9 CD4_.CD~ Pre 2n~ "One mon~    18    16     2.13  3.31e-2 0.232   ns          
## 10 CD4_.CD~ Post 2~ "One mon~    18    16     1.17  2.42e-1 0.970   ns
bartlett.test(CD4_.CD38.Ki67._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes'))
## 
##  Bartlett test of homogeneity of variances
## 
## data:  CD4_.CD38.Ki67._FreqParent by timeCategory
## Bartlett's K-squared = 5.1362, df = 4, p-value = 0.2736
dunn_test(CD4_.CD38.Ki67._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes')) 
## # A tibble: 10 x 9
##    .y.        group1   group2        n1    n2 statistic     p p.adj p.adj.signif
##  * <chr>      <chr>    <chr>      <int> <int>     <dbl> <dbl> <dbl> <chr>       
##  1 CD4_.CD38~ Baseline "Post 1st~    12    14   -0.817  0.414     1 ns          
##  2 CD4_.CD38~ Baseline "Pre 2nd ~    12    13   -1.26   0.208     1 ns          
##  3 CD4_.CD38~ Baseline "Post 2nd~    12    14   -1.26   0.209     1 ns          
##  4 CD4_.CD38~ Baseline "One mont~    12    14   -1.16   0.245     1 ns          
##  5 CD4_.CD38~ Post 1s~ "Pre 2nd ~    14    13   -0.473  0.636     1 ns          
##  6 CD4_.CD38~ Post 1s~ "Post 2nd~    14    14   -0.456  0.648     1 ns          
##  7 CD4_.CD38~ Post 1s~ "One mont~    14    14   -0.359  0.720     1 ns          
##  8 CD4_.CD38~ Pre 2nd~ "Post 2nd~    13    14    0.0260 0.979     1 ns          
##  9 CD4_.CD38~ Pre 2nd~ "One mont~    13    14    0.121  0.904     1 ns          
## 10 CD4_.CD38~ Post 2n~ "One mont~    14    14    0.0970 0.923     1 ns
# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "CD4_.CD38.Ki67._FreqParent")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/fig1H.csv")

prePostTime(data = subsetData, xData = "timeCategory", yData="CD4_.CD38.Ki67._FreqParent", fillParam = "Prior.COVID.infection.", groupby="Record.ID", title = "Activated CD4", 
            xLabel = " ", yLabel = "Ki67+CD38+ (% CD4)", repMeasures = F, exponential=F, newform = T, recentCOVID = T)  
## [1] "block3.1 recentCOVID"
## `summarise()` regrouping output by 'timeCategory' (override with `.groups` argument)
## Warning: Removed 10 row(s) containing missing values (geom_path).

## Warning: Removed 11 rows containing missing values (geom_point).

# ggsave(filename = "./Images/ActivCD4_bothCohorts_overTime_recentCOVID.pdf")

*************** activated CD8 ********************

prePostTime(data = subsetData, xData = "timeCategory", yData="CD8_.CD38.Ki67._FreqParent", fillParam = "Prior.COVID.infection.", groupby="Record.ID", title = "Activated CD8", 
            xLabel = " ", yLabel = "Ki67+CD38+ (% CD8)", repMeasures = F, exponential=F, newform = T)  + scale_y_continuous(breaks=seq(0,10,1), limits = c(0,5.5)) # +
## [1] "block3"
## `summarise()` regrouping output by 'timeCategory' (override with `.groups` argument)
## Warning: Removed 10 row(s) containing missing values (geom_path).
## Warning: Removed 11 rows containing missing values (geom_point).

# ggsave(filename = "./Images/ActivCD8_bothCohorts_overTime.pdf")

bartlett.test(CD8_.CD38.Ki67._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## 
##  Bartlett test of homogeneity of variances
## 
## data:  CD8_.CD38.Ki67._FreqParent by timeCategory
## Bartlett's K-squared = 56.445, df = 4, p-value = 1.617e-11
kruskal_test(formula = CD8_.CD38.Ki67._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No')) 
## # A tibble: 1 x 6
##   .y.                            n statistic    df         p method        
## * <chr>                      <int>     <dbl> <int>     <dbl> <chr>         
## 1 CD8_.CD38.Ki67._FreqParent    92      24.5     4 0.0000626 Kruskal-Wallis
dunn_test(CD8_.CD38.Ki67._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## # A tibble: 10 x 9
##    .y.      group1  group2       n1    n2 statistic       p   p.adj p.adj.signif
##  * <chr>    <chr>   <chr>     <int> <int>     <dbl>   <dbl>   <dbl> <chr>       
##  1 CD8_.CD~ Baseli~ "Post 1s~    17    19    2.57   1.02e-2 8.17e-2 ns          
##  2 CD8_.CD~ Baseli~ "Pre 2nd~    17    18    2.14   3.21e-2 2.05e-1 ns          
##  3 CD8_.CD~ Baseli~ "Post 2n~    17    18    4.29   1.77e-5 1.77e-4 ***         
##  4 CD8_.CD~ Baseli~ "One mon~    17    16    4.14   3.48e-5 3.13e-4 ***         
##  5 CD8_.CD~ Post 1~ "Pre 2nd~    19    18   -0.403  6.87e-1 1.00e+0 ns          
##  6 CD8_.CD~ Post 1~ "Post 2n~    19    18    1.81   7.09e-2 2.84e-1 ns          
##  7 CD8_.CD~ Post 1~ "One mon~    19    16    1.72   8.50e-2 2.84e-1 ns          
##  8 CD8_.CD~ Pre 2n~ "Post 2n~    18    18    2.18   2.93e-2 2.05e-1 ns          
##  9 CD8_.CD~ Pre 2n~ "One mon~    18    16    2.09   3.69e-2 2.05e-1 ns          
## 10 CD8_.CD~ Post 2~ "One mon~    18    16   -0.0281 9.78e-1 1.00e+0 ns
bartlett.test(CD8_.CD38.Ki67._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes'))
## 
##  Bartlett test of homogeneity of variances
## 
## data:  CD8_.CD38.Ki67._FreqParent by timeCategory
## Bartlett's K-squared = 11.124, df = 4, p-value = 0.0252
kruskal_test(formula = CD8_.CD38.Ki67._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes')) 
## # A tibble: 1 x 6
##   .y.                            n statistic    df       p method        
## * <chr>                      <int>     <dbl> <int>   <dbl> <chr>         
## 1 CD8_.CD38.Ki67._FreqParent    74      16.0     4 0.00298 Kruskal-Wallis
dunn_test(CD8_.CD38.Ki67._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes'))
## # A tibble: 10 x 9
##    .y.      group1  group2       n1    n2 statistic       p   p.adj p.adj.signif
##  * <chr>    <chr>   <chr>     <int> <int>     <dbl>   <dbl>   <dbl> <chr>       
##  1 CD8_.CD~ Baseli~ "Post 1s~    12    14    1.26   2.08e-1 1       ns          
##  2 CD8_.CD~ Baseli~ "Pre 2nd~    12    13    0.817  4.14e-1 1       ns          
##  3 CD8_.CD~ Baseli~ "Post 2n~    12    14    3.39   6.89e-4 0.00620 **          
##  4 CD8_.CD~ Baseli~ "One mon~    12    14    0.0901 9.28e-1 1       ns          
##  5 CD8_.CD~ Post 1~ "Pre 2nd~    14    13   -0.438  6.62e-1 1       ns          
##  6 CD8_.CD~ Post 1~ "Post 2n~    14    14    2.22   2.63e-2 0.184   ns          
##  7 CD8_.CD~ Post 1~ "One mon~    14    14   -1.22   2.23e-1 1       ns          
##  8 CD8_.CD~ Pre 2n~ "Post 2n~    13    14    2.62   8.86e-3 0.0709  ns          
##  9 CD8_.CD~ Pre 2n~ "One mon~    13    14   -0.757  4.49e-1 1       ns          
## 10 CD8_.CD~ Post 2~ "One mon~    14    14   -3.44   5.84e-4 0.00584 **
# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "CD8_.CD38.Ki67._FreqParent")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/fig1F.csv")



prePostTime(data = subsetData, xData = "timeCategory", yData="CD8_.CD38.Ki67._FreqParent", fillParam = "Prior.COVID.infection.", groupby="Record.ID", title = "Activated CD8", 
            xLabel = " ", yLabel = "Ki67+CD38+ (% CD8)", repMeasures = F, exponential=F, newform = T, recentCOVID = T)  
## [1] "block3.1 recentCOVID"
## `summarise()` regrouping output by 'timeCategory' (override with `.groups` argument)
## Warning: Removed 10 row(s) containing missing values (geom_path).

## Warning: Removed 11 rows containing missing values (geom_point).

# ggsave(filename = "./Images/ActivCD8_bothCohorts_overTime_recentCOVID.pdf")

—————— GzmB CD 8 analyses ————————–

subsetData <- subset(mergedData,  timeCategory != "two Weeks"& timeCategory != "2 wks post 2nd dose"); 
subsetData$timeCategory <- factor(subsetData$timeCategory, levels = c("Baseline", "Post 1st dose", "Pre 2nd dose", "Post 2nd dose", "One month post\n2nd dose"))

prePostTime(data = subsetData, xData = "timeCategory", yData="CD8_.CD38.Ki67..GzmB..CD8_FreqParent", fillParam = "Prior.COVID.infection.", groupby="Record.ID", title = "Activated CD8", 
            xLabel = " ", yLabel = "GzmB+ (% CD8+Ki67+CD38+)", repMeasures=F, newform=T) +
  scale_y_continuous(limits = c(0,110), breaks = seq(0,100,10)) # + 
## [1] "block3"
## `summarise()` regrouping output by 'timeCategory' (override with `.groups` argument)
## Warning: Removed 10 row(s) containing missing values (geom_path).
## Warning: Removed 11 rows containing missing values (geom_point).

# ggsave(filename = "./Images/CD8_CD38hiKi67hi_GzmB.pdf" )

bartlett.test(CD8_.CD38.Ki67..GzmB..CD8_FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## 
##  Bartlett test of homogeneity of variances
## 
## data:  CD8_.CD38.Ki67..GzmB..CD8_FreqParent by timeCategory
## Bartlett's K-squared = 2.4068, df = 4, p-value = 0.6614
bartlett.test(CD8_.CD38.Ki67..GzmB..CD8_FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes'))
## 
##  Bartlett test of homogeneity of variances
## 
## data:  CD8_.CD38.Ki67..GzmB..CD8_FreqParent by timeCategory
## Bartlett's K-squared = 2.5524, df = 4, p-value = 0.6353
dunn_test(CD8_.CD38.Ki67..GzmB..CD8_FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## # A tibble: 10 x 9
##    .y.        group1  group2      n1    n2 statistic       p  p.adj p.adj.signif
##  * <chr>      <chr>   <chr>    <int> <int>     <dbl>   <dbl>  <dbl> <chr>       
##  1 CD8_.CD38~ Baseli~ "Post 1~    19    21   -0.0926 0.926   1      ns          
##  2 CD8_.CD38~ Baseli~ "Pre 2n~    19    20   -0.700  0.484   1      ns          
##  3 CD8_.CD38~ Baseli~ "Post 2~    19    20    2.47   0.0134  0.0938 ns          
##  4 CD8_.CD38~ Baseli~ "One mo~    19    18   -0.650  0.515   1      ns          
##  5 CD8_.CD38~ Post 1~ "Pre 2n~    21    20   -0.624  0.533   1      ns          
##  6 CD8_.CD38~ Post 1~ "Post 2~    21    20    2.63   0.00856 0.0684 ns          
##  7 CD8_.CD38~ Post 1~ "One mo~    21    18   -0.575  0.565   1      ns          
##  8 CD8_.CD38~ Pre 2n~ "Post 2~    20    20    3.21   0.00131 0.0131 *           
##  9 CD8_.CD38~ Pre 2n~ "One mo~    20    18    0.0316 0.975   1      ns          
## 10 CD8_.CD38~ Post 2~ "One mo~    20    18   -3.10   0.00196 0.0176 *
dunn_test( CD8_.CD38.Ki67..GzmB..CD8_FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes'))
## # A tibble: 10 x 9
##    .y.       group1  group2      n1    n2 statistic       p   p.adj p.adj.signif
##  * <chr>     <chr>   <chr>    <int> <int>     <dbl>   <dbl>   <dbl> <chr>       
##  1 CD8_.CD3~ Baseli~ "Post 1~    12    14    2.88   3.97e-3 0.0278  *           
##  2 CD8_.CD3~ Baseli~ "Pre 2n~    12    13    0.0941 9.25e-1 1       ns          
##  3 CD8_.CD3~ Baseli~ "Post 2~    12    14    3.58   3.39e-4 0.00339 **          
##  4 CD8_.CD3~ Baseli~ "One mo~    12    14    0.341  7.33e-1 1       ns          
##  5 CD8_.CD3~ Post 1~ "Pre 2n~    14    13   -2.84   4.45e-3 0.0278  *           
##  6 CD8_.CD3~ Post 1~ "Post 2~    14    14    0.732  4.64e-1 1       ns          
##  7 CD8_.CD3~ Post 1~ "One mo~    14    14   -2.64   8.22e-3 0.0411  *           
##  8 CD8_.CD3~ Pre 2n~ "Post 2~    13    14    3.56   3.67e-4 0.00339 **          
##  9 CD8_.CD3~ Pre 2n~ "One mo~    13    14    0.250  8.02e-1 1       ns          
## 10 CD8_.CD3~ Post 2~ "One mo~    14    14   -3.38   7.37e-4 0.00590 **
# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "CD8_.CD38.Ki67..GzmB..CD8_FreqParent")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/figs1H.csv")

#———————————— Age correlations with activated CD4 responses ——————————————

subsetData <- subset(mergedData, Prior.COVID.infection. == 'No' & timeCategory == "Post 1st dose")
subsetData <- subsetData[,grep( paste(c("CD8_.CD38.Ki67._FreqParent","CD4_.CD38.Ki67._FreqParent","Age"), collapse = "|"), names(subsetData))]  
cor.matrix <- cor(subsetData, method="kendall" , use="pairwise.complete.obs" )
cor.matrix.pmat <- ggcorrplot::cor_pmat(subsetData, method="kendall", use="pairwise.complete.obs"  )
## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties
cor.matrix <- as.data.frame(cor.matrix); cor.matrix$Labels <- row.names(cor.matrix); cor.matrix$Prior.COVID <- "No"
cor.matrix <- merge( x = cor.matrix, y = cor.matrix.pmat[,"Age"], by = "row.names"); names(cor.matrix)[grep("y",names(cor.matrix))] <- "Pvalue"

subsetData <- subset(mergedData, Prior.COVID.infection. == 'Yes' & timeCategory == "Post 1st dose")
subsetData <- subsetData[,grep(  paste(c("CD8_.CD38.Ki67._FreqParent","CD4_.CD38.Ki67._FreqParent","Age" ), collapse = "|"), names(subsetData))] 
cor.matrix2 <- cor(subsetData, method="kendall" , use="pairwise.complete.obs" )
cor.matrix2.pmat <- ggcorrplot::cor_pmat(subsetData, method="kendall", use="pairwise.complete.obs"  )
## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties
cor.matrix2 <- as.data.frame(cor.matrix2); cor.matrix2$Labels <- row.names(cor.matrix2); cor.matrix2$Prior.COVID <- "Yes"
cor.matrix2 <- merge( x = cor.matrix2, y = cor.matrix2.pmat[,"Age"], by = "row.names"); names(cor.matrix2)[grep("y",names(cor.matrix2))] <- "Pvalue"

temp <- as.data.frame(rbind(cor.matrix, cor.matrix2)); temp <- temp[ -grep( paste( c("Age"), collapse = "|"), temp$Row.names),]

subsetData <- subset(mergedData, Prior.COVID.infection. == 'No' & timeCategory == "Post 2nd dose")
subsetData <- subsetData[,grep( paste(c("CD8_.CD38.Ki67._FreqParent","CD4_.CD38.Ki67._FreqParent","Age" ), collapse = "|"), names(subsetData))]
cor.matrix <- cor(subsetData, method="kendall" , use="pairwise.complete.obs" )
cor.matrix.pmat <- ggcorrplot::cor_pmat(subsetData, method="kendall", use="pairwise.complete.obs"  )
## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties
cor.matrix <- as.data.frame(cor.matrix); cor.matrix$Labels <- row.names(cor.matrix); cor.matrix$Prior.COVID <- "No"
cor.matrix <- merge( x = cor.matrix, y = cor.matrix.pmat[,"Age"], by = "row.names"); names(cor.matrix)[grep("y",names(cor.matrix))] <- "Pvalue"

subsetData <- subset(mergedData, Prior.COVID.infection. == 'Yes' & timeCategory == "Post 2nd dose")
subsetData <- subsetData[,grep( paste(c("CD8_.CD38.Ki67._FreqParent","CD4_.CD38.Ki67._FreqParent","Age" ), collapse = "|"), names(subsetData))]
cor.matrix2 <- cor(subsetData, method="kendall" , use="pairwise.complete.obs" )
cor.matrix2.pmat <- ggcorrplot::cor_pmat(subsetData, method="kendall", use="pairwise.complete.obs"  )
## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties
cor.matrix2 <- as.data.frame(cor.matrix2); cor.matrix2$Labels <- row.names(cor.matrix2); cor.matrix2$Prior.COVID <- "Yes"
cor.matrix2 <- merge( x = cor.matrix2, y = cor.matrix2.pmat[,"Age"], by = "row.names"); names(cor.matrix2)[grep("y",names(cor.matrix2))] <- "Pvalue"


temp2 <- as.data.frame(rbind(cor.matrix, cor.matrix2)); temp2 <- temp2[ -grep( paste( c("Age"), collapse = "|"), temp2$Row.names),]


temp[,"Labels"]  <- "Frequency\npost 1st dose";  temp2[,"Labels"]  <- "Frequency\npost 2nd dose"
temp <- temp[,c("Row.names","Age","Labels","Prior.COVID","Pvalue")];  temp2 <- temp2[,c("Row.names", "Age","Labels","Prior.COVID","Pvalue")]  
temp <- as.data.frame(rbind(temp, temp2))
temp$Labels <- factor(temp$Labels, levels = c("Fold-change\npost 2nd dose", "Fold-change\npost 1st dose","Frequency\npost 2nd dose",   "Frequency\npost 1st dose"))
a <- ggplot( data = subset(temp, Row.names == "CD4_.CD38.Ki67._FreqParent"), aes(x = Labels,y = Age, fill = Prior.COVID)) + geom_bar(stat='identity',position = position_dodge(width=0.5), width=0.05, color="black", size=0.1) + 
  geom_point(aes(fill=Prior.COVID, size=Pvalue), pch=21, color="black", stroke=0.2, position = position_dodge(width=0.5)) + 
  theme_bw() + scale_size(range = c(18,1), breaks = c(0,0.05,0.1,0.2,0.7), limits = c(0,0.8), trans = 'pseudo_log') + guides(size = guide_legend(reverse=TRUE)) + 
  scale_fill_manual(values=c("#FFC26A", "#B5B2F1")) + ylab("Kendall's tau vs Age") + xlab(" ") + ggtitle("CD4+Ki67+CD38+") + geom_hline(yintercept=0, linetype = "dashed") +
  theme(axis.text.y = element_text(size = 20, color="black"), plot.title = element_text(size=20), axis.text.x = element_text(size=20, color="black", angle=45, hjust=1,vjust=1), 
        axis.title.x = element_text(size=20, color="black")) + 
  coord_flip() + scale_y_continuous(limits = c(-1,0.5), breaks = seq(-1,1,0.25))
a

# ggsave(filename = "./Images/Age_CD4correlations_lollipop.pdf", width=5, height = 5)
b <- ggplot( data = subset(temp, Row.names == "CD8_.CD38.Ki67._FreqParent"), aes(x = Labels,y = Age, fill = Prior.COVID)) + geom_bar(stat='identity',position = position_dodge(width=0.5), width=0.05, color="black", size=0.1) + 
  geom_point(aes(fill=Prior.COVID, size=Pvalue), pch=21, color="black", stroke=0.2, position = position_dodge(width=0.5)) + 
  theme_bw() + scale_size(range = c(18,1), breaks = c(0,0.05,0.1,0.2,0.7), limits = c(0,0.8), trans = 'pseudo_log') + guides(size = guide_legend(reverse=TRUE)) + 
  scale_fill_manual(values=c("#FFC26A", "#B5B2F1")) + ylab("Kendall's tau vs Age") + xlab(" ") + ggtitle("CD8+Ki67+CD38+") + geom_hline(yintercept=0, linetype = "dashed") +
  theme(axis.text.y = element_text(size = 20, color="black"), plot.title = element_text(size=20), axis.text.x = element_text(size=20, color="black", angle=45, hjust=1,vjust=1), 
        axis.title.x = element_text(size=20, color="black"), legend.text = element_text(size=20), legend.title = element_text(size=20)) + 
  coord_flip() + scale_y_continuous(limits = c(-1,0.5), breaks = seq(-1,1,0.25))
b
## Warning: Removed 1 rows containing missing values (geom_point).

# ggsave(filename = "./Images/Age_CD8correlations_lollipop.pdf", width=5, height = 5)
a <- a + theme(legend.position = "none");  b <- b+theme(axis.text.y = element_blank())
grid.arrange(a,b, nrow=1, widths = c(1,1.1))
## Warning: Removed 1 rows containing missing values (geom_point).

# ggsave( plot = arrangeGrob(a,b, nrow=1, widths = c(1,1)), filename =  "./Images/Age_Tcellcorrelations_lollipop.pdf", width=9, height=6)

—————— Correlational analyses ————————–

bivScatter(data1 = subset(mergedData, Prior.COVID.infection. == 'No' & timeCategory == "Post 1st dose" & Tube == "HEP"), name1 = "Naive", 
           data2 = subset(mergedData, Prior.COVID.infection. == 'Yes' & timeCategory == "Post 1st dose" & Tube == "HEP"), name2 = "Experienced", 
           xData = "FCActivCD4_Vax1", yData = "FCActivCD8_Vax1", fillParam = "Prior.COVID.infection.", title = "Post 1st dose", 
           xLabel = "Fold-change CD4+Ki67+CD38+", yLabel = "Fold-change CD8+Ki67+CD38+", nonparam = T) + 
  scale_x_continuous(trans='pseudo_log', breaks=c(10^(0:4)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10)) + 
  scale_y_continuous(trans='pseudo_log', breaks=c(10^(0:4)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_point).

## Warning: Removed 2 rows containing missing values (geom_point).

# ggsave(filename = "./Images/FCActivCD4_correl_FCActivCD8_Vax1.pdf", width=8)
bivScatter(data1 = subset(mergedData, Prior.COVID.infection. == 'No' & timeCategory == "Post 2nd dose" & Tube == "HEP"), name1 = "Naive", 
           data2 = subset(mergedData, Prior.COVID.infection. == 'Yes' & timeCategory == "Post 2nd dose" & Tube == "HEP"), name2 = "Experienced", 
           xData = "FCActivCD4_Vax2", yData = "FCActivCD8_Vax2", fillParam = "Prior.COVID.infection.", title = "Post 2nd dose", 
           xLabel = "Fold-change CD4+Ki67+CD38+", yLabel = "Fold-change CD8+Ki67+CD38+", nonparam = T) + 
  scale_x_continuous(trans='pseudo_log', breaks=c(10^(0:4)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10)) + 
  scale_y_continuous(trans='pseudo_log', limits = c(0,200), breaks=c(10^(0:4)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (geom_point).

# ggsave(filename = "./Images/FCActivCD4_correl_FCActivCD8_Vax2.pdf", width=8)


subsetData <- subset(mergedData, Prior.COVID.infection. == 'No' & timeCategory == "Post 1st dose")
subsetData <- subsetData[,grep("^FCActiv",names(subsetData))]
cor.elispots <- cor(subsetData, method="kendall" , use="pairwise.complete.obs" )
cor.elispots.pmat <- ggcorrplot::cor_pmat(subsetData, method="kendall", use="pairwise.complete.obs"  )
ggcorrplot::ggcorrplot(corr = cor.elispots, p.mat = cor.elispots.pmat, title = "Fold-change responses - Naive", legend.title = "Kendall tau", insig = "blank", tl.cex = 20)

# ggsave(filename = "./Images/Fold-changes_ggcorrplot_Naive.pdf")

subsetData <- subset(mergedData, Prior.COVID.infection. == 'Yes' & timeCategory == "Post 1st dose")
subsetData <- subsetData[,grep("^FCActiv",names(subsetData))]
cor.elispots <- cor(subsetData, method="kendall" , use="pairwise.complete.obs" )
cor.elispots.pmat <- ggcorrplot::cor_pmat(subsetData, method="kendall", use="pairwise.complete.obs"  )
ggcorrplot::ggcorrplot(corr = cor.elispots, p.mat = cor.elispots.pmat, title = "Fold-change responses - Experienced", legend.title = "Kendall tau", insig = "blank", tl.cex = 20)

# ggsave(filename = "./Images/Fold-changes_ggcorrplot_Experienced.pdf")


bivScatter(data1 = subset(mergedData, Prior.COVID.infection. == 'No' & timeCategory == "Post 1st dose" & Tube == "HEP"), name1 = "Naive", 
           data2 = subset(mergedData, Prior.COVID.infection. == 'Yes' & timeCategory == "Post 1st dose" & Tube == "HEP"), name2 = "Experienced", 
           xData = "Age", yData = "CD4_.CD38.Ki67._FreqParent", fillParam = "Prior.COVID.infection.", title = "Post 1st dose", 
           xLabel = "Age", yLabel = "Ki67+CD38+ (% CD4)", nonparam = T) + 
  scale_x_continuous(limits = c(20,70), breaks=seq(0,100,10)) + scale_y_continuous(limits = c(0,5), breaks=seq(0,10,1))
## Warning in cor.test.default(data1[, xData], data1[, yData], method = "kendall"):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(data2[, xData], data2[, yData], method = "kendall"):
## Cannot compute exact p-value with ties
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

# ggsave(filename = "./Images/ActivCD4_correl_Age_Vax1.pdf", width=8)

bivScatter(data1 = subset(mergedData, Prior.COVID.infection. == 'No' & timeCategory == "Post 2nd dose" & Tube == "HEP"), name1 = "Naive", 
           data2 = subset(mergedData, Prior.COVID.infection. == 'Yes' & timeCategory == "Post 2nd dose" & Tube == "HEP"), name2 = "Experienced", 
           xData = "Age", yData = "CD4_.CD38.Ki67._FreqParent", fillParam = "Prior.COVID.infection.", title = "Post 2nd dose", 
           xLabel = "Age", yLabel = "Ki67+CD38+ (% CD4)", nonparam = T) + 
  scale_x_continuous(limits = c(20,70), breaks=seq(0,100,10)) + scale_y_continuous(limits = c(0,5), breaks=seq(0,10,1))
## Warning in cor.test.default(data1[, xData], data1[, yData], method = "kendall"):
## Cannot compute exact p-value with ties

## Warning in cor.test.default(data1[, xData], data1[, yData], method = "kendall"):
## Cannot compute exact p-value with ties
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

# ggsave(filename = "./Images/ActivCD4_correl_Age_Vax2.pdf", width=8)


bivScatter(data1 = subset(mergedData, Prior.COVID.infection. == 'No' & timeCategory == "Post 1st dose" & Tube == "HEP"), name1 = "Naive", 
           data2 = subset(mergedData, Prior.COVID.infection. == 'Yes' & timeCategory == "Post 1st dose" & Tube == "HEP"), name2 = "Experienced", 
           xData = "Age", yData = "CD8_.CD38.Ki67._FreqParent", fillParam = "Prior.COVID.infection.", title = "Post 1st dose", 
           xLabel = "Age", yLabel = "Ki67+CD38+ (% CD8)", nonparam = T) + 
  scale_x_continuous(limits = c(20,70), breaks=seq(0,100,10)) + scale_y_continuous(limits = c(0,5), breaks=seq(0,10,1))
## Warning in cor.test.default(data1[, xData], data1[, yData], method = "kendall"):
## Cannot compute exact p-value with ties

## Warning in cor.test.default(data1[, xData], data1[, yData], method = "kendall"):
## Cannot compute exact p-value with ties
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

# ggsave(filename = "./Images/ActivCD8_correl_Age_Vax1.pdf", width=8)

bivScatter(data1 = subset(mergedData, Prior.COVID.infection. == 'No' & timeCategory == "Post 2nd dose" & Tube == "HEP"), name1 = "Naive", 
           data2 = subset(mergedData, Prior.COVID.infection. == 'Yes' & timeCategory == "Post 2nd dose" & Tube == "HEP"), name2 = "Experienced", 
           xData = "Age", yData = "CD8_.CD38.Ki67._FreqParent", fillParam = "Prior.COVID.infection.", title = "Post 2nd dose", 
           xLabel = "Age", yLabel = "Ki67+CD38+ (% CD8)", nonparam = T) + 
  scale_x_continuous(limits = c(20,70), breaks=seq(0,100,10)) + scale_y_continuous(limits = c(0,5), breaks=seq(0,10,1))
## Warning in cor.test.default(data1[, xData], data1[, yData], method = "kendall"):
## Cannot compute exact p-value with ties

## Warning in cor.test.default(data1[, xData], data1[, yData], method = "kendall"):
## Cannot compute exact p-value with ties
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

# ggsave(filename = "./Images/ActivCD8_correl_Age_Vax2.pdf", width=8)

—————— AIM analyses ————————–

subsetData <- subset(mergedData,  timeCategory != "two Weeks"& timeCategory != "2 wks post 2nd dose")

******************** CD4 analyses ********************

prePostTime(subsetData, xData = "timeCategory", yData="AIM_CD4.CD69.CD200._FreqParent", fillParam = "Prior.COVID.infection.", groupby="Record.ID", title = "CD69+CD200+ CD4", 
            xLabel = " ", yLabel = "CD69+CD200+ (% CD4)", repMeasures = F, exponential=F, newform = T)  + scale_y_continuous(trans="log10", limits = c(0.002, 0.8)) #+ 
## [1] "block3"
## `summarise()` regrouping output by 'timeCategory' (override with `.groups` argument)
## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 118 row(s) containing missing values (geom_path).
## Warning: Removed 118 rows containing missing values (geom_point).

# ggsave(filename = "./Images/AIM_CD4_CD69CD200_overTime.pdf")
bartlett.test(AIM_CD4.CD69.CD200._FreqParent ~ timeCategory, data=subsetData)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  AIM_CD4.CD69.CD200._FreqParent by timeCategory
## Bartlett's K-squared = 49.709, df = 4, p-value = 4.154e-10
kruskal_test(formula = AIM_CD4.CD69.CD200._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## # A tibble: 1 x 6
##   .y.                                n statistic    df       p method        
## * <chr>                          <int>     <dbl> <int>   <dbl> <chr>         
## 1 AIM_CD4.CD69.CD200._FreqParent   102      17.1     4 0.00184 Kruskal-Wallis
dunn_test(formula = AIM_CD4.CD69.CD200._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## # A tibble: 10 x 9
##    .y.       group1  group2       n1    n2 statistic       p  p.adj p.adj.signif
##  * <chr>     <chr>   <chr>     <int> <int>     <dbl>   <dbl>  <dbl> <chr>       
##  1 AIM_CD4.~ Baseli~ "Post 1s~     6     6     0.312 0.755   1      ns          
##  2 AIM_CD4.~ Baseli~ "Pre 2nd~     6     6     2.53  0.0116  0.0809 ns          
##  3 AIM_CD4.~ Baseli~ "Post 2n~     6     6     2.77  0.00558 0.0502 ns          
##  4 AIM_CD4.~ Baseli~ "One mon~     6     6     3.08  0.00205 0.0205 *           
##  5 AIM_CD4.~ Post 1~ "Pre 2nd~     6     6     2.21  0.0268  0.134  ns          
##  6 AIM_CD4.~ Post 1~ "Post 2n~     6     6     2.46  0.0139  0.0834 ns          
##  7 AIM_CD4.~ Post 1~ "One mon~     6     6     2.77  0.00558 0.0502 ns          
##  8 AIM_CD4.~ Pre 2n~ "Post 2n~     6     6     0.246 0.806   1      ns          
##  9 AIM_CD4.~ Pre 2n~ "One mon~     6     6     0.558 0.577   1      ns          
## 10 AIM_CD4.~ Post 2~ "One mon~     6     6     0.312 0.755   1      ns
dunn_test(AIM_CD4.CD69.CD200._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes'))
## # A tibble: 10 x 9
##    .y.        group1   group2       n1    n2 statistic      p p.adj p.adj.signif
##  * <chr>      <chr>    <chr>     <int> <int>     <dbl>  <dbl> <dbl> <chr>       
##  1 AIM_CD4.C~ Baseline "Post 1s~     4     6    1.37   0.172  1     ns          
##  2 AIM_CD4.C~ Baseline "Pre 2nd~     4     6    1.32   0.187  1     ns          
##  3 AIM_CD4.C~ Baseline "Post 2n~     4     6    1.07   0.285  1     ns          
##  4 AIM_CD4.C~ Baseline "One mon~     4     6    1.74   0.0812 0.812 ns          
##  5 AIM_CD4.C~ Post 1s~ "Pre 2nd~     6     6   -0.0527 0.958  1     ns          
##  6 AIM_CD4.C~ Post 1s~ "Post 2n~     6     6   -0.334  0.739  1     ns          
##  7 AIM_CD4.C~ Post 1s~ "One mon~     6     6    0.422  0.673  1     ns          
##  8 AIM_CD4.C~ Pre 2nd~ "Post 2n~     6     6   -0.281  0.779  1     ns          
##  9 AIM_CD4.C~ Pre 2nd~ "One mon~     6     6    0.474  0.635  1     ns          
## 10 AIM_CD4.C~ Post 2n~ "One mon~     6     6    0.755  0.450  1     ns
# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "AIM_CD4.CD69.CD200._FreqParent")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/fig2D.csv")



subsetData <- subset(mergedData, timeCategory == "Baseline" | timeCategory == "One month post\n2nd dose")
FC_response2 <- dcast( subsetData, `Record.ID`+ `Prior.COVID.infection.` ~`timeCategory`, value.var = c("AIM_CD4.CD69.CD200._FreqParent")) 
FC_response2$FoldChange <- FC_response2$`One month post\n2nd dose`/FC_response2$`Baseline`; FC_response2$Cohort <- NULL
FC_response2 <- FC_response2[!is.infinite(FC_response2$FoldChange), ]
FC_response2 %>% group_by(Prior.COVID.infection.) %>% get_summary_stats(type = "common") 
## # A tibble: 6 x 11
##   Prior.COVID.inf~ variable     n   min    max median   iqr   mean    sd    se
##   <chr>            <chr>    <dbl> <dbl>  <dbl>  <dbl> <dbl>  <dbl> <dbl> <dbl>
## 1 No               "Baseli~     5 0.006  0.04   0.013 0.016  0.018 0.014 0.006
## 2 No               "FoldCh~     5 2.25  26.9    9.22  7.96  11.3   9.66  4.32 
## 3 No               "One mo~     5 0.029  0.35   0.09  0.233  0.171 0.147 0.066
## 4 Yes              "Baseli~     4 0.013  0.027  0.024 0.007  0.022 0.006 0.003
## 5 Yes              "FoldCh~     3 0.963  2.57   2.15  0.804  1.90  0.835 0.482
## 6 Yes              "One mo~     6 0.025  0.056  0.04  0.026  0.04  0.015 0.006
## # ... with 1 more variable: ci <dbl>
wilcox_test( data = FC_response2, formula = FoldChange ~ Prior.COVID.infection.)
## # A tibble: 1 x 7
##   .y.        group1 group2    n1    n2 statistic      p
## * <chr>      <chr>  <chr>  <int> <int>     <dbl>  <dbl>
## 1 FoldChange No     Yes       20    15        14 0.0714
subsetData <- subset(mergedData,  timeCategory != "two Weeks" & timeCategory != "2 wks post 2nd dose")

prePostTime(subsetData, xData = "timeCategory", yData="AIM_CD4.CD69.CD200._FreqParent", fillParam = "Prior.COVID.infection.", groupby="Record.ID", title = "CD69+CD200+ CD4", 
            xLabel = " ", yLabel = "CD69+CD200+ (% CD4)", repMeasures = F, exponential=F, newform = T, recentCOVID = T)  + scale_y_continuous(trans="log10", limits = c(0.002, 0.8))
## [1] "block3.1 recentCOVID"
## `summarise()` regrouping output by 'timeCategory' (override with `.groups` argument)
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 118 row(s) containing missing values (geom_path).
## Warning: Removed 118 rows containing missing values (geom_point).
## Warning: Removed 5 row(s) containing missing values (geom_path).

# ggsave(filename = "./Images/AIM_CD4_CD69CD200_overTime_recentCOVID.pdf")


prePostTime(subsetData, xData = "timeCategory", yData="AIM_CD4.Ox40.CD137._FreqParent", fillParam = "Prior.COVID.infection.", groupby="Record.ID", title = "OX40+CD137+ CD4", 
            xLabel = " ", yLabel = "CD137+OX40+ (% CD4)", repMeasures = F, exponential=F, newform = T)  + scale_y_continuous(trans = "log10")
## [1] "block3"
## `summarise()` regrouping output by 'timeCategory' (override with `.groups` argument)
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 118 row(s) containing missing values (geom_path).
## Warning: Removed 118 rows containing missing values (geom_point).

# ggsave(filename = "./Images/AIM_CD4_CD137Ox40_overTime.pdf")

bartlett.test(AIM_CD4.Ox40.CD137._FreqParent ~ timeCategory, data=subsetData)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  AIM_CD4.Ox40.CD137._FreqParent by timeCategory
## Bartlett's K-squared = 18.986, df = 4, p-value = 0.0007909
kruskal_test(formula = AIM_CD4.Ox40.CD137._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## # A tibble: 1 x 6
##   .y.                                n statistic    df      p method        
## * <chr>                          <int>     <dbl> <int>  <dbl> <chr>         
## 1 AIM_CD4.Ox40.CD137._FreqParent   102      12.7     4 0.0131 Kruskal-Wallis
dunn_test(formula = AIM_CD4.Ox40.CD137._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## # A tibble: 10 x 9
##    .y.       group1  group2       n1    n2 statistic       p  p.adj p.adj.signif
##  * <chr>     <chr>   <chr>     <int> <int>     <dbl>   <dbl>  <dbl> <chr>       
##  1 AIM_CD4.~ Baseli~ "Post 1s~     6     6     0.492 0.623   1      ns          
##  2 AIM_CD4.~ Baseli~ "Pre 2nd~     6     6     2.21  0.0268  0.188  ns          
##  3 AIM_CD4.~ Baseli~ "Post 2n~     6     6     2.44  0.0145  0.131  ns          
##  4 AIM_CD4.~ Baseli~ "One mon~     6     6     2.80  0.00504 0.0504 ns          
##  5 AIM_CD4.~ Post 1~ "Pre 2nd~     6     6     1.72  0.0851  0.425  ns          
##  6 AIM_CD4.~ Post 1~ "Post 2n~     6     6     1.95  0.0510  0.306  ns          
##  7 AIM_CD4.~ Post 1~ "One mon~     6     6     2.31  0.0208  0.166  ns          
##  8 AIM_CD4.~ Pre 2n~ "Post 2n~     6     6     0.230 0.818   1      ns          
##  9 AIM_CD4.~ Pre 2n~ "One mon~     6     6     0.590 0.555   1      ns          
## 10 AIM_CD4.~ Post 2~ "One mon~     6     6     0.361 0.718   1      ns
kruskal_test(formula = AIM_CD4.Ox40.CD137._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes')) 
## # A tibble: 1 x 6
##   .y.                                n statistic    df     p method        
## * <chr>                          <int>     <dbl> <int> <dbl> <chr>         
## 1 AIM_CD4.Ox40.CD137._FreqParent    74      1.60     4 0.809 Kruskal-Wallis
dunn_test(formula = AIM_CD4.Ox40.CD137._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes'))
## # A tibble: 10 x 9
##    .y.         group1  group2        n1    n2 statistic     p p.adj p.adj.signif
##  * <chr>       <chr>   <chr>      <int> <int>     <dbl> <dbl> <dbl> <chr>       
##  1 AIM_CD4.Ox~ Baseli~ "Post 1st~     4     6    0.377  0.706     1 ns          
##  2 AIM_CD4.Ox~ Baseli~ "Pre 2nd ~     4     6    0.0314 0.975     1 ns          
##  3 AIM_CD4.Ox~ Baseli~ "Post 2nd~     4     6   -0.502  0.615     1 ns          
##  4 AIM_CD4.Ox~ Baseli~ "One mont~     4     6    0.534  0.594     1 ns          
##  5 AIM_CD4.Ox~ Post 1~ "Pre 2nd ~     6     6   -0.386  0.699     1 ns          
##  6 AIM_CD4.Ox~ Post 1~ "Post 2nd~     6     6   -0.983  0.326     1 ns          
##  7 AIM_CD4.Ox~ Post 1~ "One mont~     6     6    0.176  0.861     1 ns          
##  8 AIM_CD4.Ox~ Pre 2n~ "Post 2nd~     6     6   -0.597  0.551     1 ns          
##  9 AIM_CD4.Ox~ Pre 2n~ "One mont~     6     6    0.562  0.574     1 ns          
## 10 AIM_CD4.Ox~ Post 2~ "One mont~     6     6    1.16   0.247     1 ns
# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "AIM_CD4.Ox40.CD137._FreqParent")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/figs2L.csv")

******************** CD8 analyses ********************

prePostTime(subsetData, xData = "timeCategory", yData="AIM_CD8.CD137.IFNg._FreqParent", fillParam = "Prior.COVID.infection.", groupby="Record.ID", title = "CD137+IFNg+ CD8", 
            xLabel = " ", yLabel = "CD137+IFNg+ (% CD8)", repMeasures = F, exponential=F, newform = T) + scale_y_continuous(trans = "log10", limits = c(0.0003,0.5)) 
## [1] "block3"
## `summarise()` regrouping output by 'timeCategory' (override with `.groups` argument)
## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 118 row(s) containing missing values (geom_path).
## Warning: Removed 118 rows containing missing values (geom_point).

# ggsave(filename = "./Images/AIM_CD8_CD137IFNg_overTime.pdf")
bartlett.test(AIM_CD8.CD137.IFNg._FreqParent ~ timeCategory, data=subsetData)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  AIM_CD8.CD137.IFNg._FreqParent by timeCategory
## Bartlett's K-squared = 53.51, df = 4, p-value = 6.664e-11
kruskal_test(formula = AIM_CD8.CD137.IFNg._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## # A tibble: 1 x 6
##   .y.                                n statistic    df       p method        
## * <chr>                          <int>     <dbl> <int>   <dbl> <chr>         
## 1 AIM_CD8.CD137.IFNg._FreqParent   102      15.0     4 0.00474 Kruskal-Wallis
dunn_test(formula = AIM_CD8.CD137.IFNg._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## # A tibble: 10 x 9
##    .y.       group1  group2      n1    n2 statistic       p   p.adj p.adj.signif
##  * <chr>     <chr>   <chr>    <int> <int>     <dbl>   <dbl>   <dbl> <chr>       
##  1 AIM_CD8.~ Baseli~ "Post 1~     6     6    -0.538 5.91e-1 1       ns          
##  2 AIM_CD8.~ Baseli~ "Pre 2n~     6     6     0.639 5.23e-1 1       ns          
##  3 AIM_CD8.~ Baseli~ "Post 2~     6     6     1.75  8.04e-2 0.482   ns          
##  4 AIM_CD8.~ Baseli~ "One mo~     6     6     2.86  4.26e-3 0.0384  *           
##  5 AIM_CD8.~ Post 1~ "Pre 2n~     6     6     1.18  2.39e-1 1       ns          
##  6 AIM_CD8.~ Post 1~ "Post 2~     6     6     2.29  2.22e-2 0.178   ns          
##  7 AIM_CD8.~ Post 1~ "One mo~     6     6     3.40  6.84e-4 0.00684 **          
##  8 AIM_CD8.~ Pre 2n~ "Post 2~     6     6     1.11  2.67e-1 1       ns          
##  9 AIM_CD8.~ Pre 2n~ "One mo~     6     6     2.22  2.65e-2 0.185   ns          
## 10 AIM_CD8.~ Post 2~ "One mo~     6     6     1.11  2.67e-1 1       ns
kruskal_test(formula = AIM_CD8.CD137.IFNg._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes')) 
## # A tibble: 1 x 6
##   .y.                                n statistic    df     p method        
## * <chr>                          <int>     <dbl> <int> <dbl> <chr>         
## 1 AIM_CD8.CD137.IFNg._FreqParent    74      1.15     4 0.886 Kruskal-Wallis
dunn_test(formula = AIM_CD8.CD137.IFNg._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes'))
## # A tibble: 10 x 9
##    .y.         group1  group2        n1    n2 statistic     p p.adj p.adj.signif
##  * <chr>       <chr>   <chr>      <int> <int>     <dbl> <dbl> <dbl> <chr>       
##  1 AIM_CD8.CD~ Baseli~ "Post 1st~     4     6   -0.0786 0.937     1 ns          
##  2 AIM_CD8.CD~ Baseli~ "Pre 2nd ~     4     6   -0.707  0.480     1 ns          
##  3 AIM_CD8.CD~ Baseli~ "Post 2nd~     4     6   -0.0786 0.937     1 ns          
##  4 AIM_CD8.CD~ Baseli~ "One mont~     4     6    0.204  0.838     1 ns          
##  5 AIM_CD8.CD~ Post 1~ "Pre 2nd ~     6     6   -0.703  0.482     1 ns          
##  6 AIM_CD8.CD~ Post 1~ "Post 2nd~     6     6    0      1         1 ns          
##  7 AIM_CD8.CD~ Post 1~ "One mont~     6     6    0.316  0.752     1 ns          
##  8 AIM_CD8.CD~ Pre 2n~ "Post 2nd~     6     6    0.703  0.482     1 ns          
##  9 AIM_CD8.CD~ Pre 2n~ "One mont~     6     6    1.02   0.308     1 ns          
## 10 AIM_CD8.CD~ Post 2~ "One mont~     6     6    0.316  0.752     1 ns
# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "AIM_CD8.CD137.IFNg._FreqParent")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/fig2B.csv")



subsetData <- subset(mergedData, timeCategory == "Baseline" | timeCategory == "One month post\n2nd dose")
FC_response2 <- dcast( subsetData, `Record.ID` + `Prior.COVID.infection.` ~`timeCategory`, value.var = c("AIM_CD8.CD137.IFNg._FreqParent")) 
FC_response2$FoldChange <- FC_response2$`One month post\n2nd dose`/FC_response2$`Baseline`; FC_response2$Cohort <- NULL
FC_response2 <- FC_response2[!is.infinite(FC_response2$FoldChange), ]
FC_response2 %>% group_by(Prior.COVID.infection.) %>% get_summary_stats(type = "common") 
## # A tibble: 6 x 11
##   Prior.COVID.inf~ variable     n   min    max median    iqr   mean     sd    se
##   <chr>            <chr>    <dbl> <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl> <dbl>
## 1 No               "Baseli~     3 0.002  0.003  0.002  0.001  0.003  0.001 0    
## 2 No               "FoldCh~     3 1.85  24.3    4.97  11.2   10.4   12.1   7.01 
## 3 No               "One mo~     3 0.006  0.058  0.009  0.026  0.025  0.029 0.017
## 4 Yes              "Baseli~     4 0.002  0.028  0.011  0.009  0.013  0.011 0.005
## 5 Yes              "FoldCh~     3 2.46   7.14   5.80   2.34   5.13   2.41  1.39 
## 6 Yes              "One mo~     6 0      0.2    0.017  0.016  0.044  0.077 0.031
## # ... with 1 more variable: ci <dbl>
wilcox_test( data = FC_response2, formula = FoldChange ~ Prior.COVID.infection.)
## # A tibble: 1 x 7
##   .y.        group1 group2    n1    n2 statistic     p
## * <chr>      <chr>  <chr>  <int> <int>     <dbl> <dbl>
## 1 FoldChange No     Yes       18    15         4     1
subsetData <- subset(mergedData,  timeCategory != "two Weeks" & timeCategory != "2 wks post 2nd dose")

prePostTime(subsetData, xData = "timeCategory", yData="AIM_CD8.CD137.IFNg._FreqParent", fillParam = "Prior.COVID.infection.", groupby="Record.ID", title = "CD137+IFNg+ CD8", 
            xLabel = " ", yLabel = "CD137+IFNg+ (% CD8)", repMeasures = F, exponential=F, newform = T, recentCOVID = T) + scale_y_continuous(trans = "log10", limits = c(0.0003,0.5))
## [1] "block3.1 recentCOVID"
## `summarise()` regrouping output by 'timeCategory' (override with `.groups` argument)
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 118 row(s) containing missing values (geom_path).
## Warning: Removed 118 rows containing missing values (geom_point).
## Warning: Removed 5 row(s) containing missing values (geom_path).

# ggsave(filename = "./Images/AIM_CD8_CD137IFNg_overTime_recentCOVID.pdf")

prePostTime(subsetData, xData = "timeCategory", yData="AIM_CD8.Ox40.CD137._FreqParent", fillParam = "Prior.COVID.infection.", groupby="Record.ID", title = "OX40+CD137+ CD8", 
            xLabel = " ", yLabel = "CD137+OX40+  (% CD8)", repMeasures = F, exponential=F, newform = T) + scale_y_continuous(trans = "log10")
## [1] "block3"
## `summarise()` regrouping output by 'timeCategory' (override with `.groups` argument)
## Warning: Removed 118 row(s) containing missing values (geom_path).
## Warning: Removed 118 rows containing missing values (geom_point).

# ggsave(filename = "./Images/AIM_CD8_CD137Ox40_overTime.pdf")
bartlett.test(AIM_CD8.Ox40.CD137._FreqParent ~ timeCategory, data=subsetData)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  AIM_CD8.Ox40.CD137._FreqParent by timeCategory
## Bartlett's K-squared = 33.349, df = 4, p-value = 1.013e-06
kruskal_test(formula = AIM_CD8.Ox40.CD137._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## # A tibble: 1 x 6
##   .y.                                n statistic    df      p method        
## * <chr>                          <int>     <dbl> <int>  <dbl> <chr>         
## 1 AIM_CD8.Ox40.CD137._FreqParent   102      12.9     4 0.0115 Kruskal-Wallis
dunn_test(formula = AIM_CD8.Ox40.CD137._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## # A tibble: 10 x 9
##    .y.       group1  group2       n1    n2 statistic       p  p.adj p.adj.signif
##  * <chr>     <chr>   <chr>     <int> <int>     <dbl>   <dbl>  <dbl> <chr>       
##  1 AIM_CD8.~ Baseli~ "Post 1s~     6     6    -1.49  0.136   0.813  ns          
##  2 AIM_CD8.~ Baseli~ "Pre 2nd~     6     6     0     1       1      ns          
##  3 AIM_CD8.~ Baseli~ "Post 2n~     6     6     1.33  0.184   0.813  ns          
##  4 AIM_CD8.~ Baseli~ "One mon~     6     6     1.72  0.0850  0.680  ns          
##  5 AIM_CD8.~ Post 1~ "Pre 2nd~     6     6     1.49  0.136   0.813  ns          
##  6 AIM_CD8.~ Post 1~ "Post 2n~     6     6     2.82  0.00479 0.0431 *           
##  7 AIM_CD8.~ Post 1~ "One mon~     6     6     3.21  0.00131 0.0131 *           
##  8 AIM_CD8.~ Pre 2n~ "Post 2n~     6     6     1.33  0.184   0.813  ns          
##  9 AIM_CD8.~ Pre 2n~ "One mon~     6     6     1.72  0.0850  0.680  ns          
## 10 AIM_CD8.~ Post 2~ "One mon~     6     6     0.394 0.694   1      ns
kruskal_test(formula = AIM_CD8.Ox40.CD137._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes')) 
## # A tibble: 1 x 6
##   .y.                                n statistic    df     p method        
## * <chr>                          <int>     <dbl> <int> <dbl> <chr>         
## 1 AIM_CD8.Ox40.CD137._FreqParent    74      3.19     4 0.526 Kruskal-Wallis
dunn_test(formula = AIM_CD8.Ox40.CD137._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes'))
## # A tibble: 10 x 9
##    .y.        group1   group2       n1    n2 statistic      p p.adj p.adj.signif
##  * <chr>      <chr>    <chr>     <int> <int>     <dbl>  <dbl> <dbl> <chr>       
##  1 AIM_CD8.O~ Baseline "Post 1s~     4     6    -1.28  0.200  1     ns          
##  2 AIM_CD8.O~ Baseline "Pre 2nd~     4     6    -1.09  0.275  1     ns          
##  3 AIM_CD8.O~ Baseline "Post 2n~     4     6    -1.72  0.0853 0.853 ns          
##  4 AIM_CD8.O~ Baseline "One mon~     4     6    -0.856 0.392  1     ns          
##  5 AIM_CD8.O~ Post 1s~ "Pre 2nd~     6     6     0.211 0.833  1     ns          
##  6 AIM_CD8.O~ Post 1s~ "Post 2n~     6     6    -0.492 0.623  1     ns          
##  7 AIM_CD8.O~ Post 1s~ "One mon~     6     6     0.474 0.635  1     ns          
##  8 AIM_CD8.O~ Pre 2nd~ "Post 2n~     6     6    -0.703 0.482  1     ns          
##  9 AIM_CD8.O~ Pre 2nd~ "One mon~     6     6     0.264 0.792  1     ns          
## 10 AIM_CD8.O~ Post 2n~ "One mon~     6     6     0.966 0.334  1     ns
# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "AIM_CD8.Ox40.CD137._FreqParent")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/figs2J.csv")



prePostTime(subsetData, xData = "timeCategory", yData="AIM_CD4.TNF._FreqParent", fillParam = "Prior.COVID.infection.", groupby="Record.ID", title = "TNF+ CD4", 
            xLabel = " ", yLabel = "TNF+ (% CD4)", repMeasures = F, exponential=F, newform = T)  + scale_y_continuous(trans="log10")
## [1] "block3"
## `summarise()` regrouping output by 'timeCategory' (override with `.groups` argument)
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 118 row(s) containing missing values (geom_path).
## Warning: Removed 118 rows containing missing values (geom_point).

# ggsave(filename = "./Images/AIM_CD4_TNF_overTime.pdf")

bartlett.test(AIM_CD4.TNF._FreqParent ~ timeCategory, data=subsetData)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  AIM_CD4.TNF._FreqParent by timeCategory
## Bartlett's K-squared = 34.701, df = 4, p-value = 5.352e-07
kruskal_test(formula = AIM_CD4.TNF._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## # A tibble: 1 x 6
##   .y.                         n statistic    df       p method        
## * <chr>                   <int>     <dbl> <int>   <dbl> <chr>         
## 1 AIM_CD4.TNF._FreqParent   102      15.3     4 0.00415 Kruskal-Wallis
dunn_test(formula = AIM_CD4.TNF._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## # A tibble: 10 x 9
##    .y.       group1  group2       n1    n2 statistic       p  p.adj p.adj.signif
##  * <chr>     <chr>   <chr>     <int> <int>     <dbl>   <dbl>  <dbl> <chr>       
##  1 AIM_CD4.~ Baseli~ "Post 1s~     6     6     0.541 0.588   1      ns          
##  2 AIM_CD4.~ Baseli~ "Pre 2nd~     6     6     2.18  0.0292  0.204  ns          
##  3 AIM_CD4.~ Baseli~ "Post 2n~     6     6     2.53  0.0116  0.0925 ns          
##  4 AIM_CD4.~ Baseli~ "One mon~     6     6     3.28  0.00104 0.0104 *           
##  5 AIM_CD4.~ Post 1~ "Pre 2nd~     6     6     1.64  0.101   0.505  ns          
##  6 AIM_CD4.~ Post 1~ "Post 2n~     6     6     1.98  0.0472  0.283  ns          
##  7 AIM_CD4.~ Post 1~ "One mon~     6     6     2.74  0.00617 0.0556 ns          
##  8 AIM_CD4.~ Pre 2n~ "Post 2n~     6     6     0.344 0.731   1      ns          
##  9 AIM_CD4.~ Pre 2n~ "One mon~     6     6     1.10  0.272   1      ns          
## 10 AIM_CD4.~ Post 2~ "One mon~     6     6     0.754 0.451   1      ns
kruskal_test(formula = AIM_CD4.TNF._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes')) 
## # A tibble: 1 x 6
##   .y.                         n statistic    df     p method        
## * <chr>                   <int>     <dbl> <int> <dbl> <chr>         
## 1 AIM_CD4.TNF._FreqParent    74      2.99     4  0.56 Kruskal-Wallis
dunn_test(formula = AIM_CD4.TNF._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes'))
## # A tibble: 10 x 9
##    .y.       group1   group2        n1    n2 statistic      p p.adj p.adj.signif
##  * <chr>     <chr>    <chr>      <int> <int>     <dbl>  <dbl> <dbl> <chr>       
##  1 AIM_CD4.~ Baseline "Post 1st~     4     6    0.856  0.392  1     ns          
##  2 AIM_CD4.~ Baseline "Pre 2nd ~     4     6    1.06   0.289  1     ns          
##  3 AIM_CD4.~ Baseline "Post 2nd~     4     6    0.887  0.375  1     ns          
##  4 AIM_CD4.~ Baseline "One mont~     4     6    1.70   0.0883 0.883 ns          
##  5 AIM_CD4.~ Post 1s~ "Pre 2nd ~     6     6    0.228  0.819  1     ns          
##  6 AIM_CD4.~ Post 1s~ "Post 2nd~     6     6    0.0351 0.972  1     ns          
##  7 AIM_CD4.~ Post 1s~ "One mont~     6     6    0.948  0.343  1     ns          
##  8 AIM_CD4.~ Pre 2nd~ "Post 2nd~     6     6   -0.193  0.847  1     ns          
##  9 AIM_CD4.~ Pre 2nd~ "One mont~     6     6    0.720  0.472  1     ns          
## 10 AIM_CD4.~ Post 2n~ "One mont~     6     6    0.913  0.361  1     ns
# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "AIM_CD4.TNF._FreqParent")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/figs2F.csv")


prePostTime(subsetData, xData = "timeCategory", yData="AIM_CD8.TNF._FreqParent", fillParam = "Prior.COVID.infection.", groupby="Record.ID", title = "TNF+ CD8", 
            xLabel = " ", yLabel = "TNF+ (% CD8)", repMeasures = F, exponential=F, newform = T)  + scale_y_continuous(trans="log10")
## [1] "block3"
## `summarise()` regrouping output by 'timeCategory' (override with `.groups` argument)
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 118 row(s) containing missing values (geom_path).
## Warning: Removed 118 rows containing missing values (geom_point).

# ggsave(filename = "./Images/AIM_CD8_TNF_overTime.pdf")

bartlett.test(AIM_CD8.TNF._FreqParent ~ timeCategory, data=subsetData)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  AIM_CD8.TNF._FreqParent by timeCategory
## Bartlett's K-squared = 28.845, df = 4, p-value = 8.407e-06
kruskal_test(formula = AIM_CD8.TNF._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## # A tibble: 1 x 6
##   .y.                         n statistic    df      p method        
## * <chr>                   <int>     <dbl> <int>  <dbl> <chr>         
## 1 AIM_CD8.TNF._FreqParent   102      12.9     4 0.0116 Kruskal-Wallis
dunn_test(formula = AIM_CD8.TNF._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## # A tibble: 10 x 9
##    .y.       group1  group2       n1    n2 statistic       p  p.adj p.adj.signif
##  * <chr>     <chr>   <chr>     <int> <int>     <dbl>   <dbl>  <dbl> <chr>       
##  1 AIM_CD8.~ Baseli~ "Post 1s~     6     6     0.197 0.844   1      ns          
##  2 AIM_CD8.~ Baseli~ "Pre 2nd~     6     6     0.953 0.341   1      ns          
##  3 AIM_CD8.~ Baseli~ "Post 2n~     6     6     2.14  0.0326  0.261  ns          
##  4 AIM_CD8.~ Baseli~ "One mon~     6     6     2.96  0.00310 0.0310 *           
##  5 AIM_CD8.~ Post 1~ "Pre 2nd~     6     6     0.756 0.450   1      ns          
##  6 AIM_CD8.~ Post 1~ "Post 2n~     6     6     1.94  0.0525  0.315  ns          
##  7 AIM_CD8.~ Post 1~ "One mon~     6     6     2.76  0.00576 0.0519 ns          
##  8 AIM_CD8.~ Pre 2n~ "Post 2n~     6     6     1.18  0.237   1      ns          
##  9 AIM_CD8.~ Pre 2n~ "One mon~     6     6     2.00  0.0450  0.315  ns          
## 10 AIM_CD8.~ Post 2~ "One mon~     6     6     0.822 0.411   1      ns
kruskal_test(formula = AIM_CD8.TNF._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes')) 
## # A tibble: 1 x 6
##   .y.                         n statistic    df     p method        
## * <chr>                   <int>     <dbl> <int> <dbl> <chr>         
## 1 AIM_CD8.TNF._FreqParent    74      1.94     4 0.747 Kruskal-Wallis
dunn_test(formula = AIM_CD8.TNF._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes'))
## # A tibble: 10 x 9
##    .y.        group1   group2        n1    n2 statistic     p p.adj p.adj.signif
##  * <chr>      <chr>    <chr>      <int> <int>     <dbl> <dbl> <dbl> <chr>       
##  1 AIM_CD8.T~ Baseline "Post 1st~     4     6   -0.440  0.660     1 ns          
##  2 AIM_CD8.T~ Baseline "Pre 2nd ~     4     6   -0.518  0.604     1 ns          
##  3 AIM_CD8.T~ Baseline "Post 2nd~     4     6   -0.738  0.460     1 ns          
##  4 AIM_CD8.T~ Baseline "One mont~     4     6    0.377  0.706     1 ns          
##  5 AIM_CD8.T~ Post 1s~ "Pre 2nd ~     6     6   -0.0878 0.930     1 ns          
##  6 AIM_CD8.T~ Post 1s~ "Post 2nd~     6     6   -0.334  0.739     1 ns          
##  7 AIM_CD8.T~ Post 1s~ "One mont~     6     6    0.913  0.361     1 ns          
##  8 AIM_CD8.T~ Pre 2nd~ "Post 2nd~     6     6   -0.246  0.806     1 ns          
##  9 AIM_CD8.T~ Pre 2nd~ "One mont~     6     6    1.00   0.317     1 ns          
## 10 AIM_CD8.T~ Post 2n~ "One mont~     6     6    1.25   0.213     1 ns
# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "AIM_CD8.TNF._FreqParent")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/figs2E.csv")


prePostTime(subsetData, xData = "timeCategory", yData="AIM_CD4.IFNg._FreqParent", fillParam = "Prior.COVID.infection.", groupby="Record.ID", title = "IFNg+ CD4", 
            xLabel = " ", yLabel = "IFNg+ (% CD4)", repMeasures = F, exponential=F, newform = T)  + scale_y_continuous(trans="log10")
## [1] "block3"
## `summarise()` regrouping output by 'timeCategory' (override with `.groups` argument)
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 118 row(s) containing missing values (geom_path).
## Warning: Removed 118 rows containing missing values (geom_point).

# ggsave(filename = "./Images/AIM_CD4_IFNg_overTime.pdf")

bartlett.test(AIM_CD4.IFNg._FreqParent ~ timeCategory, data=subsetData)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  AIM_CD4.IFNg._FreqParent by timeCategory
## Bartlett's K-squared = 22.681, df = 4, p-value = 0.0001466
kruskal_test(formula = AIM_CD4.IFNg._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## # A tibble: 1 x 6
##   .y.                          n statistic    df      p method        
## * <chr>                    <int>     <dbl> <int>  <dbl> <chr>         
## 1 AIM_CD4.IFNg._FreqParent   102      10.9     4 0.0272 Kruskal-Wallis
dunn_test(formula = AIM_CD4.IFNg._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## # A tibble: 10 x 9
##    .y.        group1  group2        n1    n2 statistic      p p.adj p.adj.signif
##  * <chr>      <chr>   <chr>      <int> <int>     <dbl>  <dbl> <dbl> <chr>       
##  1 AIM_CD4.I~ Baseli~ "Post 1st~     6     6    0.0328 0.974  1     ns          
##  2 AIM_CD4.I~ Baseli~ "Pre 2nd ~     6     6    1.43   0.154  0.922 ns          
##  3 AIM_CD4.I~ Baseli~ "Post 2nd~     6     6    2.31   0.0208 0.174 ns          
##  4 AIM_CD4.I~ Baseli~ "One mont~     6     6    2.38   0.0174 0.174 ns          
##  5 AIM_CD4.I~ Post 1~ "Pre 2nd ~     6     6    1.39   0.163  0.922 ns          
##  6 AIM_CD4.I~ Post 1~ "Post 2nd~     6     6    2.28   0.0226 0.174 ns          
##  7 AIM_CD4.I~ Post 1~ "One mont~     6     6    2.35   0.0190 0.174 ns          
##  8 AIM_CD4.I~ Pre 2n~ "Post 2nd~     6     6    0.886  0.376  1     ns          
##  9 AIM_CD4.I~ Pre 2n~ "One mont~     6     6    0.951  0.342  1     ns          
## 10 AIM_CD4.I~ Post 2~ "One mont~     6     6    0.0656 0.948  1     ns
kruskal_test(formula = AIM_CD4.IFNg._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes')) 
## # A tibble: 1 x 6
##   .y.                          n statistic    df     p method        
## * <chr>                    <int>     <dbl> <int> <dbl> <chr>         
## 1 AIM_CD4.IFNg._FreqParent    74      1.71     4 0.789 Kruskal-Wallis
dunn_test(formula = AIM_CD4.IFNg._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes'))
## # A tibble: 10 x 9
##    .y.        group1   group2        n1    n2 statistic     p p.adj p.adj.signif
##  * <chr>      <chr>    <chr>      <int> <int>     <dbl> <dbl> <dbl> <chr>       
##  1 AIM_CD4.I~ Baseline "Post 1st~     4     6    1.21   0.226     1 ns          
##  2 AIM_CD4.I~ Baseline "Pre 2nd ~     4     6    1.05   0.293     1 ns          
##  3 AIM_CD4.I~ Baseline "Post 2nd~     4     6    0.723  0.470     1 ns          
##  4 AIM_CD4.I~ Baseline "One mont~     4     6    0.974  0.330     1 ns          
##  5 AIM_CD4.I~ Post 1s~ "Pre 2nd ~     6     6   -0.176  0.861     1 ns          
##  6 AIM_CD4.I~ Post 1s~ "Post 2nd~     6     6   -0.544  0.586     1 ns          
##  7 AIM_CD4.I~ Post 1s~ "One mont~     6     6   -0.263  0.792     1 ns          
##  8 AIM_CD4.I~ Pre 2nd~ "Post 2nd~     6     6   -0.369  0.712     1 ns          
##  9 AIM_CD4.I~ Pre 2nd~ "One mont~     6     6   -0.0878 0.930     1 ns          
## 10 AIM_CD4.I~ Post 2n~ "One mont~     6     6    0.281  0.779     1 ns
# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "AIM_CD4.IFNg._FreqParent")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/figs2C.csv")


prePostTime(subsetData, xData = "timeCategory", yData="AIM_CD8.IFNg._FreqParent", fillParam = "Prior.COVID.infection.", groupby="Record.ID", title = "IFNg+ CD8", 
            xLabel = " ", yLabel = "IFNg+ (% CD8)", repMeasures = F, exponential=F, newform = T) + scale_y_continuous(trans = "log10")
## [1] "block3"
## `summarise()` regrouping output by 'timeCategory' (override with `.groups` argument)
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 118 row(s) containing missing values (geom_path).
## Warning: Removed 118 rows containing missing values (geom_point).

# ggsave(filename = "./Images/AIM_CD8_IFNg_overTime.pdf")

bartlett.test(AIM_CD8.IFNg._FreqParent ~ timeCategory, data=subsetData)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  AIM_CD8.IFNg._FreqParent by timeCategory
## Bartlett's K-squared = 39.19, df = 4, p-value = 6.366e-08
kruskal_test(formula = AIM_CD8.IFNg._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## # A tibble: 1 x 6
##   .y.                          n statistic    df      p method        
## * <chr>                    <int>     <dbl> <int>  <dbl> <chr>         
## 1 AIM_CD8.IFNg._FreqParent   102      9.47     4 0.0503 Kruskal-Wallis
dunn_test(formula = AIM_CD8.IFNg._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## # A tibble: 10 x 9
##    .y.       group1  group2       n1    n2 statistic       p  p.adj p.adj.signif
##  * <chr>     <chr>   <chr>     <int> <int>     <dbl>   <dbl>  <dbl> <chr>       
##  1 AIM_CD8.~ Baseli~ "Post 1s~     6     6    -1.02  0.309   1      ns          
##  2 AIM_CD8.~ Baseli~ "Pre 2nd~     6     6     0.361 0.718   1      ns          
##  3 AIM_CD8.~ Baseli~ "Post 2n~     6     6     1.12  0.264   1      ns          
##  4 AIM_CD8.~ Baseli~ "One mon~     6     6     1.84  0.0660  0.528  ns          
##  5 AIM_CD8.~ Post 1~ "Pre 2nd~     6     6     1.38  0.168   1      ns          
##  6 AIM_CD8.~ Post 1~ "Post 2n~     6     6     2.13  0.0328  0.296  ns          
##  7 AIM_CD8.~ Post 1~ "One mon~     6     6     2.86  0.00429 0.0429 *           
##  8 AIM_CD8.~ Pre 2n~ "Post 2n~     6     6     0.755 0.450   1      ns          
##  9 AIM_CD8.~ Pre 2n~ "One mon~     6     6     1.48  0.140   0.977  ns          
## 10 AIM_CD8.~ Post 2~ "One mon~     6     6     0.722 0.470   1      ns
kruskal_test(formula = AIM_CD8.IFNg._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes')) 
## # A tibble: 1 x 6
##   .y.                          n statistic    df     p method        
## * <chr>                    <int>     <dbl> <int> <dbl> <chr>         
## 1 AIM_CD8.IFNg._FreqParent    74      2.61     4 0.626 Kruskal-Wallis
dunn_test(formula = AIM_CD8.IFNg._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes'))
## # A tibble: 10 x 9
##    .y.        group1   group2        n1    n2 statistic     p p.adj p.adj.signif
##  * <chr>      <chr>    <chr>      <int> <int>     <dbl> <dbl> <dbl> <chr>       
##  1 AIM_CD8.I~ Baseline "Post 1st~     4     6    0.628  0.530     1 ns          
##  2 AIM_CD8.I~ Baseline "Pre 2nd ~     4     6   -0.314  0.753     1 ns          
##  3 AIM_CD8.I~ Baseline "Post 2nd~     4     6    0.801  0.423     1 ns          
##  4 AIM_CD8.I~ Baseline "One mont~     4     6    0.864  0.388     1 ns          
##  5 AIM_CD8.I~ Post 1s~ "Pre 2nd ~     6     6   -1.05   0.292     1 ns          
##  6 AIM_CD8.I~ Post 1s~ "Post 2nd~     6     6    0.193  0.847     1 ns          
##  7 AIM_CD8.I~ Post 1s~ "One mont~     6     6    0.263  0.792     1 ns          
##  8 AIM_CD8.I~ Pre 2nd~ "Post 2nd~     6     6    1.25   0.212     1 ns          
##  9 AIM_CD8.I~ Pre 2nd~ "One mont~     6     6    1.32   0.188     1 ns          
## 10 AIM_CD8.I~ Post 2n~ "One mont~     6     6    0.0702 0.944     1 ns
# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "AIM_CD8.IFNg._FreqParent")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/figs2B.csv")

subsetData <- subset(mergedData, timeCategory == "Baseline" | timeCategory == "One month post\n2nd dose")
FC_response2 <- dcast( subsetData, `Record.ID` + `Prior.COVID.infection.` ~`timeCategory`, value.var = c("AIM_CD8.IFNg._FreqParent")) 
FC_response2$FC_AIMCD8_IFNg <- FC_response2$`One month post\n2nd dose`/FC_response2$`Baseline`; FC_response2$Cohort <- NULL
FC_response2 %>% group_by(Prior.COVID.infection.) %>% get_summary_stats(type = "common") 
## # A tibble: 6 x 11
##   Prior.COVID.inf~ variable     n   min    max median   iqr  mean    sd    se
##   <chr>            <chr>    <dbl> <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 No               "Baseli~     6 0.001  0.009  0.002 0.001 0.003 0.003 0.001
## 2 No               "FC_AIM~     6 1.18  14.1    5.34  6.64  6.52  4.98  2.03 
## 3 No               "One mo~     6 0.003  0.13   0.008 0.024 0.032 0.05  0.02 
## 4 Yes              "Baseli~     4 0.001  0.039  0.009 0.012 0.014 0.017 0.008
## 5 Yes              "FC_AIM~     3 2.46  17.4    4.10  7.45  7.98  8.17  4.72 
## 6 Yes              "One mo~     6 0.006  0.16   0.015 0.01  0.037 0.061 0.025
## # ... with 1 more variable: ci <dbl>
subsetData <- subset(mergedData,  timeCategory != "two Weeks" & timeCategory != "2 wks post 2nd dose")


dunn_test(AIM_CD4.CD69.CD137._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No')) 
## # A tibble: 10 x 9
##    .y.       group1  group2      n1    n2 statistic       p   p.adj p.adj.signif
##  * <chr>     <chr>   <chr>    <int> <int>     <dbl>   <dbl>   <dbl> <chr>       
##  1 AIM_CD4.~ Baseli~ "Post 1~     6     6     0.951 3.41e-1 1       ns          
##  2 AIM_CD4.~ Baseli~ "Pre 2n~     6     6     2.49  1.27e-2 0.101   ns          
##  3 AIM_CD4.~ Baseli~ "Post 2~     6     6     2.87  4.10e-3 0.0369  *           
##  4 AIM_CD4.~ Baseli~ "One mo~     6     6     3.36  7.73e-4 0.00773 **          
##  5 AIM_CD4.~ Post 1~ "Pre 2n~     6     6     1.54  1.23e-1 0.616   ns          
##  6 AIM_CD4.~ Post 1~ "Post 2~     6     6     1.92  5.50e-2 0.330   ns          
##  7 AIM_CD4.~ Post 1~ "One mo~     6     6     2.41  1.59e-2 0.111   ns          
##  8 AIM_CD4.~ Pre 2n~ "Post 2~     6     6     0.377 7.06e-1 1       ns          
##  9 AIM_CD4.~ Pre 2n~ "One mo~     6     6     0.869 3.85e-1 1       ns          
## 10 AIM_CD4.~ Post 2~ "One mo~     6     6     0.492 6.23e-1 1       ns
dunn_test(AIM_CD4.CD71.CD137._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No')) 
## # A tibble: 10 x 9
##    .y.       group1  group2       n1    n2 statistic       p  p.adj p.adj.signif
##  * <chr>     <chr>   <chr>     <int> <int>     <dbl>   <dbl>  <dbl> <chr>       
##  1 AIM_CD4.~ Baseli~ "Post 1s~     6     6     0.854 0.393   1      ns          
##  2 AIM_CD4.~ Baseli~ "Pre 2nd~     6     6     2.36  0.0181  0.145  ns          
##  3 AIM_CD4.~ Baseli~ "Post 2n~     6     6     2.89  0.00386 0.0347 *           
##  4 AIM_CD4.~ Baseli~ "One mon~     6     6     3.17  0.00153 0.0153 *           
##  5 AIM_CD4.~ Post 1~ "Pre 2nd~     6     6     1.51  0.131   0.655  ns          
##  6 AIM_CD4.~ Post 1~ "Post 2n~     6     6     2.04  0.0418  0.251  ns          
##  7 AIM_CD4.~ Post 1~ "One mon~     6     6     2.31  0.0206  0.145  ns          
##  8 AIM_CD4.~ Pre 2n~ "Post 2n~     6     6     0.525 0.599   1      ns          
##  9 AIM_CD4.~ Pre 2n~ "One mon~     6     6     0.804 0.421   1      ns          
## 10 AIM_CD4.~ Post 2~ "One mon~     6     6     0.279 0.780   1      ns
dunn_test(AIM_CD4.CD40L._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))  
## # A tibble: 10 x 9
##    .y.        group1  group2        n1    n2 statistic      p p.adj p.adj.signif
##  * <chr>      <chr>   <chr>      <int> <int>     <dbl>  <dbl> <dbl> <chr>       
##  1 AIM_CD4.C~ Baseli~ "Post 1st~     6     6     1.77  0.0766 0.766 ns          
##  2 AIM_CD4.C~ Baseli~ "Pre 2nd ~     6     6     1.00  0.317  1     ns          
##  3 AIM_CD4.C~ Baseli~ "Post 2nd~     6     6     1.16  0.244  1     ns          
##  4 AIM_CD4.C~ Baseli~ "One mont~     6     6     0.246 0.806  1     ns          
##  5 AIM_CD4.C~ Post 1~ "Pre 2nd ~     6     6    -0.771 0.441  1     ns          
##  6 AIM_CD4.C~ Post 1~ "Post 2nd~     6     6    -0.607 0.544  1     ns          
##  7 AIM_CD4.C~ Post 1~ "One mont~     6     6    -1.52  0.127  1     ns          
##  8 AIM_CD4.C~ Pre 2n~ "Post 2nd~     6     6     0.164 0.870  1     ns          
##  9 AIM_CD4.C~ Pre 2n~ "One mont~     6     6    -0.754 0.451  1     ns          
## 10 AIM_CD4.C~ Post 2~ "One mont~     6     6    -0.918 0.358  1     ns
dunn_test(AIM_CD8.CD40L._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))  
## # A tibble: 10 x 9
##    .y.        group1  group2        n1    n2 statistic      p p.adj p.adj.signif
##  * <chr>      <chr>   <chr>      <int> <int>     <dbl>  <dbl> <dbl> <chr>       
##  1 AIM_CD8.C~ Baseli~ "Post 1st~     6     6    1.71   0.0880 0.792 ns          
##  2 AIM_CD8.C~ Baseli~ "Pre 2nd ~     6     6    0.361  0.718  1     ns          
##  3 AIM_CD8.C~ Baseli~ "Post 2nd~     6     6    0.443  0.658  1     ns          
##  4 AIM_CD8.C~ Baseli~ "One mont~     6     6   -0.213  0.831  1     ns          
##  5 AIM_CD8.C~ Post 1~ "Pre 2nd ~     6     6   -1.35   0.179  1     ns          
##  6 AIM_CD8.C~ Post 1~ "Post 2nd~     6     6   -1.26   0.207  1     ns          
##  7 AIM_CD8.C~ Post 1~ "One mont~     6     6   -1.92   0.0549 0.549 ns          
##  8 AIM_CD8.C~ Pre 2n~ "Post 2nd~     6     6    0.0820 0.935  1     ns          
##  9 AIM_CD8.C~ Pre 2n~ "One mont~     6     6   -0.574  0.566  1     ns          
## 10 AIM_CD8.C~ Post 2~ "One mont~     6     6   -0.656  0.512  1     ns
dunn_test(AIM_CD8.TNF._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))  
## # A tibble: 10 x 9
##    .y.       group1  group2       n1    n2 statistic       p  p.adj p.adj.signif
##  * <chr>     <chr>   <chr>     <int> <int>     <dbl>   <dbl>  <dbl> <chr>       
##  1 AIM_CD8.~ Baseli~ "Post 1s~     6     6     0.197 0.844   1      ns          
##  2 AIM_CD8.~ Baseli~ "Pre 2nd~     6     6     0.953 0.341   1      ns          
##  3 AIM_CD8.~ Baseli~ "Post 2n~     6     6     2.14  0.0326  0.261  ns          
##  4 AIM_CD8.~ Baseli~ "One mon~     6     6     2.96  0.00310 0.0310 *           
##  5 AIM_CD8.~ Post 1~ "Pre 2nd~     6     6     0.756 0.450   1      ns          
##  6 AIM_CD8.~ Post 1~ "Post 2n~     6     6     1.94  0.0525  0.315  ns          
##  7 AIM_CD8.~ Post 1~ "One mon~     6     6     2.76  0.00576 0.0519 ns          
##  8 AIM_CD8.~ Pre 2n~ "Post 2n~     6     6     1.18  0.237   1      ns          
##  9 AIM_CD8.~ Pre 2n~ "One mon~     6     6     2.00  0.0450  0.315  ns          
## 10 AIM_CD8.~ Post 2~ "One mon~     6     6     0.822 0.411   1      ns
dunn_test(AIM_CD8.TNF._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))  
## # A tibble: 10 x 9
##    .y.       group1  group2       n1    n2 statistic       p  p.adj p.adj.signif
##  * <chr>     <chr>   <chr>     <int> <int>     <dbl>   <dbl>  <dbl> <chr>       
##  1 AIM_CD8.~ Baseli~ "Post 1s~     6     6     0.197 0.844   1      ns          
##  2 AIM_CD8.~ Baseli~ "Pre 2nd~     6     6     0.953 0.341   1      ns          
##  3 AIM_CD8.~ Baseli~ "Post 2n~     6     6     2.14  0.0326  0.261  ns          
##  4 AIM_CD8.~ Baseli~ "One mon~     6     6     2.96  0.00310 0.0310 *           
##  5 AIM_CD8.~ Post 1~ "Pre 2nd~     6     6     0.756 0.450   1      ns          
##  6 AIM_CD8.~ Post 1~ "Post 2n~     6     6     1.94  0.0525  0.315  ns          
##  7 AIM_CD8.~ Post 1~ "One mon~     6     6     2.76  0.00576 0.0519 ns          
##  8 AIM_CD8.~ Pre 2n~ "Post 2n~     6     6     1.18  0.237   1      ns          
##  9 AIM_CD8.~ Pre 2n~ "One mon~     6     6     2.00  0.0450  0.315  ns          
## 10 AIM_CD8.~ Post 2~ "One mon~     6     6     0.822 0.411   1      ns

—————— Tfh analyses ————————–

subsetData <- subset(mergedData,  timeCategory != "two Weeks" & timeCategory != "2 wks post 2nd dose");   
subsetData$timeCategory <- factor(subsetData$timeCategory, levels = c("Baseline", "Post 1st dose", "Pre 2nd dose", "Post 2nd dose","One month post\n2nd dose"))

prePostTime(subsetData, xData = "timeCategory", yData="CD4_.Nonnaive.cTfh.ICOS..CD38.._FreqParent", fillParam = "Prior.COVID.infection.", groupby="Record.ID", title = "cTfh", 
            xLabel = " ", yLabel = "ICOS+CD38+ (% cTfh)", repMeasures = F, newform=T)  
## [1] "block3"
## `summarise()` regrouping output by 'timeCategory' (override with `.groups` argument)
## Warning: Removed 10 row(s) containing missing values (geom_path).
## Warning: Removed 11 rows containing missing values (geom_point).

# ggsave(filename = "./Images/cTfh_responses_bothCohorts_overTime.pdf")
bartlett.test(CD4_.Nonnaive.cTfh.ICOS..CD38.._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## 
##  Bartlett test of homogeneity of variances
## 
## data:  CD4_.Nonnaive.cTfh.ICOS..CD38.._FreqParent by timeCategory
## Bartlett's K-squared = 0.22116, df = 4, p-value = 0.9943
dunn_test(CD4_.Nonnaive.cTfh.ICOS..CD38.._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## # A tibble: 10 x 9
##    .y.       group1  group2      n1    n2 statistic       p   p.adj p.adj.signif
##  * <chr>     <chr>   <chr>    <int> <int>     <dbl>   <dbl>   <dbl> <chr>       
##  1 CD4_.Non~ Baseli~ "Post 1~    19    21     1.64  1.00e-1 4.02e-1 ns          
##  2 CD4_.Non~ Baseli~ "Pre 2n~    19    20     1.50  1.34e-1 4.02e-1 ns          
##  3 CD4_.Non~ Baseli~ "Post 2~    19    20     3.37  7.39e-4 6.66e-3 **          
##  4 CD4_.Non~ Baseli~ "One mo~    19    18     4.39  1.16e-5 1.16e-4 ***         
##  5 CD4_.Non~ Post 1~ "Pre 2n~    21    20    -0.127 8.99e-1 8.99e-1 ns          
##  6 CD4_.Non~ Post 1~ "Post 2~    21    20     1.80  7.26e-2 3.63e-1 ns          
##  7 CD4_.Non~ Post 1~ "One mo~    21    18     2.87  4.08e-3 2.86e-2 *           
##  8 CD4_.Non~ Pre 2n~ "Post 2~    20    20     1.90  5.76e-2 3.45e-1 ns          
##  9 CD4_.Non~ Pre 2n~ "One mo~    20    18     2.96  3.07e-3 2.46e-2 *           
## 10 CD4_.Non~ Post 2~ "One mo~    20    18     1.11  2.66e-1 5.32e-1 ns
bartlett.test(CD4_.Nonnaive.cTfh.ICOS..CD38.._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes'))
## 
##  Bartlett test of homogeneity of variances
## 
## data:  CD4_.Nonnaive.cTfh.ICOS..CD38.._FreqParent by timeCategory
## Bartlett's K-squared = 2.0152, df = 4, p-value = 0.733
dunn_test(CD4_.Nonnaive.cTfh.ICOS..CD38.._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes')) 
## # A tibble: 10 x 9
##    .y.          group1  group2       n1    n2 statistic     p p.adj p.adj.signif
##  * <chr>        <chr>   <chr>     <int> <int>     <dbl> <dbl> <dbl> <chr>       
##  1 CD4_.Nonnai~ Baseli~ "Post 1s~    12    14     1.15  0.252     1 ns          
##  2 CD4_.Nonnai~ Baseli~ "Pre 2nd~    12    13     0.634 0.526     1 ns          
##  3 CD4_.Nonnai~ Baseli~ "Post 2n~    12    14     0.228 0.819     1 ns          
##  4 CD4_.Nonnai~ Baseli~ "One mon~    12    14     0.992 0.321     1 ns          
##  5 CD4_.Nonnai~ Post 1~ "Pre 2nd~    14    13    -0.512 0.609     1 ns          
##  6 CD4_.Nonnai~ Post 1~ "Post 2n~    14    14    -0.955 0.339     1 ns          
##  7 CD4_.Nonnai~ Post 1~ "One mon~    14    14    -0.160 0.873     1 ns          
##  8 CD4_.Nonnai~ Pre 2n~ "Post 2n~    13    14    -0.425 0.671     1 ns          
##  9 CD4_.Nonnai~ Pre 2n~ "One mon~    13    14     0.355 0.723     1 ns          
## 10 CD4_.Nonnai~ Post 2~ "One mon~    14    14     0.795 0.426     1 ns
# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "CD4_.Nonnaive.cTfh.ICOS..CD38.._FreqParent")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/fig3B.csv")



prePostTime(subsetData, xData = "timeCategory", yData="CD4_.Nonnaive.cTfh.ICOS..CD38.._FreqParent", fillParam = "Prior.COVID.infection.", groupby="Record.ID", title = "cTfh", 
            xLabel = " ", yLabel = "ICOS+CD38+ (% cTfh)", repMeasures = F, newform=T, recentCOVID = T)  
## [1] "block3.1 recentCOVID"
## `summarise()` regrouping output by 'timeCategory' (override with `.groups` argument)
## Warning: Removed 10 row(s) containing missing values (geom_path).

## Warning: Removed 11 rows containing missing values (geom_point).

# ggsave(filename = "./Images/cTfh_responses_bothCohorts_overTime_recentCOVID.pdf")


subsetData <- subset(mergedData,  timeCategory != "two Weeks" & timeCategory != "2 wks post 2nd dose"); 
subsetData$timeCategory <- factor(subsetData$timeCategory, levels = c("Baseline", "Post 1st dose", "Pre 2nd dose", "Post 2nd dose","One month post\n2nd dose"))
prePostTime(data = subsetData, xData = "timeCategory", yData="CD4_.Nonnaive.cTfh.ICOS..CD38...CXCR3._FreqParent", fillParam = "Prior.COVID.infection.", groupby="Record.ID", title = "cTfh", 
            xLabel = " ", yLabel = "CXCR3+ (% ICOS+CD38+ cTfh)", repMeasures = F, newform =  T)  + scale_y_continuous(limits = c(0,80),breaks=seq(0,100,10)) 
## [1] "block3"
## `summarise()` regrouping output by 'timeCategory' (override with `.groups` argument)
## Warning: Removed 11 row(s) containing missing values (geom_path).
## Warning: Removed 12 rows containing missing values (geom_point).

# ggsave(filename = "./Images/cTfh_responses_CXCR3_bothCohorts_overTime.pdf")
bartlett.test(CD4_.Nonnaive.cTfh.ICOS..CD38...CXCR3._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## 
##  Bartlett test of homogeneity of variances
## 
## data:  CD4_.Nonnaive.cTfh.ICOS..CD38...CXCR3._FreqParent by timeCategory
## Bartlett's K-squared = 7.3798, df = 4, p-value = 0.1171
dunn_test(CD4_.Nonnaive.cTfh.ICOS..CD38...CXCR3._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## # A tibble: 10 x 9
##    .y.        group1  group2     n1    n2 statistic       p   p.adj p.adj.signif
##  * <chr>      <chr>   <chr>   <int> <int>     <dbl>   <dbl>   <dbl> <chr>       
##  1 CD4_.Nonn~ Baseli~ "Post ~    19    21    2.64   8.19e-3 0.0655  ns          
##  2 CD4_.Nonn~ Baseli~ "Pre 2~    19    20    3.46   5.47e-4 0.00493 **          
##  3 CD4_.Nonn~ Baseli~ "Post ~    19    20    3.51   4.46e-4 0.00446 **          
##  4 CD4_.Nonn~ Baseli~ "One m~    19    18    1.53   1.26e-1 0.628   ns          
##  5 CD4_.Nonn~ Post 1~ "Pre 2~    21    20    0.865  3.87e-1 1       ns          
##  6 CD4_.Nonn~ Post 1~ "Post ~    21    20    0.921  3.57e-1 1       ns          
##  7 CD4_.Nonn~ Post 1~ "One m~    21    18   -1.04   2.99e-1 1       ns          
##  8 CD4_.Nonn~ Pre 2n~ "Post ~    20    20    0.0556 9.56e-1 1       ns          
##  9 CD4_.Nonn~ Pre 2n~ "One m~    20    18   -1.86   6.32e-2 0.391   ns          
## 10 CD4_.Nonn~ Post 2~ "One m~    20    18   -1.91   5.59e-2 0.391   ns
bartlett.test(CD4_.Nonnaive.cTfh.ICOS..CD38...CXCR3._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes'))
## 
##  Bartlett test of homogeneity of variances
## 
## data:  CD4_.Nonnaive.cTfh.ICOS..CD38...CXCR3._FreqParent by timeCategory
## Bartlett's K-squared = 12.894, df = 4, p-value = 0.0118
# tukey_hsd( aov(CD4_.Nonnaive.cTfh.ICOS..CD38...CXCR3._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes')) )
kruskal_test(formula = CD4_.Nonnaive.cTfh.ICOS..CD38...CXCR3._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes')) 
## # A tibble: 1 x 6
##   .y.                                        n statistic    df     p method     
## * <chr>                                  <int>     <dbl> <int> <dbl> <chr>      
## 1 CD4_.Nonnaive.cTfh.ICOS..CD38...CXCR3~    74      5.83     4 0.212 Kruskal-Wa~
dunn_test(CD4_.Nonnaive.cTfh.ICOS..CD38...CXCR3._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes'))
## # A tibble: 10 x 9
##    .y.          group1  group2      n1    n2 statistic      p p.adj p.adj.signif
##  * <chr>        <chr>   <chr>    <int> <int>     <dbl>  <dbl> <dbl> <chr>       
##  1 CD4_.Nonnai~ Baseli~ "Post 1~    12    14     2.18  0.0293 0.293 ns          
##  2 CD4_.Nonnai~ Baseli~ "Pre 2n~    12    13     0.757 0.449  1     ns          
##  3 CD4_.Nonnai~ Baseli~ "Post 2~    12    14     0.642 0.521  1     ns          
##  4 CD4_.Nonnai~ Baseli~ "One mo~    12    14     0.339 0.734  1     ns          
##  5 CD4_.Nonnai~ Post 1~ "Pre 2n~    14    13    -1.44  0.150  1     ns          
##  6 CD4_.Nonnai~ Post 1~ "Post 2~    14    14    -1.60  0.110  0.876 ns          
##  7 CD4_.Nonnai~ Post 1~ "One mo~    14    14    -1.92  0.0554 0.499 ns          
##  8 CD4_.Nonnai~ Pre 2n~ "Post 2~    13    14    -0.130 0.896  1     ns          
##  9 CD4_.Nonnai~ Pre 2n~ "One mo~    13    14    -0.440 0.660  1     ns          
## 10 CD4_.Nonnai~ Post 2~ "One mo~    14    14    -0.315 0.753  1     ns
# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "CD4_.Nonnaive.cTfh.ICOS..CD38...CXCR3._FreqParent")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/fig3D.csv")


prePostTime(data = subsetData, xData = "timeCategory", yData="CD4_.Nonnaive.cTfh.ICOS..CD38...CXCR3._FreqParent", fillParam = "Prior.COVID.infection.", groupby="Record.ID", title = "cTfh", 
            xLabel = " ", yLabel = "CXCR3+ (% ICOS+CD38+ cTfh)", repMeasures = F, newform =  T, recentCOVID = T)  + scale_y_continuous(limits = c(0,80),breaks=seq(0,100,10)) 
## [1] "block3.1 recentCOVID"
## `summarise()` regrouping output by 'timeCategory' (override with `.groups` argument)
## Warning: Removed 11 row(s) containing missing values (geom_path).

## Warning: Removed 12 rows containing missing values (geom_point).

# ggsave(filename = "./Images/cTfh_responses_CXCR3_bothCohorts_overTime_recentCOVID.pdf")




subsetData <- subset(mergedData,  timeCategory != "two Weeks" & timeCategory != "2 wks post 2nd dose");   
subsetData$timeCategory <- factor(subsetData$timeCategory, levels = c("Baseline", "Post 1st dose", "Pre 2nd dose", "Post 2nd dose","One month post\n2nd dose"))

prePostTime(subsetData, xData = "timeCategory", yData="AIM_CD4.CXCR5.PD1..CD38hi.CD69.CD200._FreqParent", fillParam = "Prior.COVID.infection.", groupby="Record.ID", 
            title = "Activated cTfh", xLabel = " ", yLabel = "CD69+CD200+ (% activated cTfh)", repMeasures = F, exponential=F, newform = T) + 
  scale_y_continuous(limits = c(0,10), breaks = seq(0,10,1))
## [1] "block3"
## `summarise()` regrouping output by 'timeCategory' (override with `.groups` argument)
## Warning: Removed 118 row(s) containing missing values (geom_path).
## Warning: Removed 118 rows containing missing values (geom_point).

# ggsave(filename = "./Images/AIM_cTfh_69-200_overTime.pdf")
bartlett.test(AIM_CD4.CXCR5.PD1..CD38hi.CD69.CD200._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## 
##  Bartlett test of homogeneity of variances
## 
## data:  AIM_CD4.CXCR5.PD1..CD38hi.CD69.CD200._FreqParent by timeCategory
## Bartlett's K-squared = 9.6695, df = 4, p-value = 0.04638
kruskal_test(formula = AIM_CD4.CXCR5.PD1..CD38hi.CD69.CD200._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No')) 
## # A tibble: 1 x 6
##   .y.                                       n statistic    df      p method     
## * <chr>                                 <int>     <dbl> <int>  <dbl> <chr>      
## 1 AIM_CD4.CXCR5.PD1..CD38hi.CD69.CD200~   102      12.4     4 0.0147 Kruskal-Wa~
dunn_test(AIM_CD4.CXCR5.PD1..CD38hi.CD69.CD200._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## # A tibble: 10 x 9
##    .y.        group1  group2     n1    n2 statistic       p   p.adj p.adj.signif
##  * <chr>      <chr>   <chr>   <int> <int>     <dbl>   <dbl>   <dbl> <chr>       
##  1 AIM_CD4.C~ Baseli~ "Post ~     6     6     1.06  2.91e-1 1       ns          
##  2 AIM_CD4.C~ Baseli~ "Pre 2~     6     6     2.05  4.08e-2 0.326   ns          
##  3 AIM_CD4.C~ Baseli~ "Post ~     6     6     3.37  7.63e-4 0.00763 **          
##  4 AIM_CD4.C~ Baseli~ "One m~     6     6     1.78  7.48e-2 0.523   ns          
##  5 AIM_CD4.C~ Post 1~ "Pre 2~     6     6     0.990 3.22e-1 1       ns          
##  6 AIM_CD4.C~ Post 1~ "Post ~     6     6     2.31  2.09e-2 0.188   ns          
##  7 AIM_CD4.C~ Post 1~ "One m~     6     6     0.726 4.68e-1 1       ns          
##  8 AIM_CD4.C~ Pre 2n~ "Post ~     6     6     1.32  1.87e-1 0.934   ns          
##  9 AIM_CD4.C~ Pre 2n~ "One m~     6     6    -0.264 7.92e-1 1       ns          
## 10 AIM_CD4.C~ Post 2~ "One m~     6     6    -1.58  1.13e-1 0.679   ns
bartlett.test(AIM_CD4.CXCR5.PD1..CD38hi.CD69.CD200._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes'))
## 
##  Bartlett test of homogeneity of variances
## 
## data:  AIM_CD4.CXCR5.PD1..CD38hi.CD69.CD200._FreqParent by timeCategory
## Bartlett's K-squared = 1.9833, df = 4, p-value = 0.7388
kruskal_test(formula = AIM_CD4.CXCR5.PD1..CD38hi.CD69.CD200._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes')) 
## # A tibble: 1 x 6
##   .y.                                        n statistic    df     p method     
## * <chr>                                  <int>     <dbl> <int> <dbl> <chr>      
## 1 AIM_CD4.CXCR5.PD1..CD38hi.CD69.CD200.~    74      4.95     4 0.293 Kruskal-Wa~
dunn_test(AIM_CD4.CXCR5.PD1..CD38hi.CD69.CD200._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes')) 
## # A tibble: 10 x 9
##    .y.          group1  group2      n1    n2 statistic      p p.adj p.adj.signif
##  * <chr>        <chr>   <chr>    <int> <int>     <dbl>  <dbl> <dbl> <chr>       
##  1 AIM_CD4.CXC~ Baseli~ "Post 1~     4     6    1.41   0.159  1     ns          
##  2 AIM_CD4.CXC~ Baseli~ "Pre 2n~     4     6    1.33   0.184  1     ns          
##  3 AIM_CD4.CXC~ Baseli~ "Post 2~     4     6    0.622  0.534  1     ns          
##  4 AIM_CD4.CXC~ Baseli~ "One mo~     4     6   -0.165  0.869  1     ns          
##  5 AIM_CD4.CXC~ Post 1~ "Pre 2n~     6     6   -0.0880 0.930  1     ns          
##  6 AIM_CD4.CXC~ Post 1~ "Post 2~     6     6   -0.880  0.379  1     ns          
##  7 AIM_CD4.CXC~ Post 1~ "One mo~     6     6   -1.76   0.0785 0.785 ns          
##  8 AIM_CD4.CXC~ Pre 2n~ "Post 2~     6     6   -0.792  0.428  1     ns          
##  9 AIM_CD4.CXC~ Pre 2n~ "One mo~     6     6   -1.67   0.0946 0.851 ns          
## 10 AIM_CD4.CXC~ Post 2~ "One mo~     6     6   -0.880  0.379  1     ns
# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "AIM_CD4.CXCR5.PD1..CD38hi.CD69.CD200._FreqParent")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/fig3F.csv")


subsetData <- subset(mergedData, timeCategory == "Baseline" | timeCategory == "Post 2nd dose")
FC_response2 <- dcast( subsetData, `Record.ID` + `Prior.COVID.infection.` ~`timeCategory`, value.var = c("AIM_CD4.CXCR5.PD1..CD38hi.CD69.CD200._FreqParent")) 
FC_response2$FoldChange <- FC_response2$`Post 2nd dose`/FC_response2$`Baseline`; FC_response2$Cohort <- NULL
FC_response2 <- FC_response2[!is.infinite(FC_response2$FoldChange), ]
FC_response2 %>% group_by(Prior.COVID.infection.) %>% get_summary_stats(type = "common") 
## # A tibble: 6 x 11
##   Prior.COVID.inf~ variable     n   min   max median    iqr  mean     sd     se
##   <chr>            <chr>    <dbl> <dbl> <dbl>  <dbl>  <dbl> <dbl>  <dbl>  <dbl>
## 1 No               Baseline     2 0.290  1.75  1.02   0.73   1.02  1.03   0.73 
## 2 No               FoldCha~     2 1.83  27.2  14.5   12.7   14.5  17.9   12.7  
## 3 No               Post 2n~     2 3.21   7.89  5.55   2.34   5.55  3.31   2.34 
## 4 Yes              Baseline     3 0.47   2.04  1.4    0.785  1.30  0.789  0.456
## 5 Yes              FoldCha~     3 0.441  3.02  0.471  1.29   1.31  1.48   0.855
## 6 Yes              Post 2n~     5 0.66   3.39  1.42   0.89   1.63  1.08   0.482
## # ... with 1 more variable: ci <dbl>

———————————— Age correlations with Tfh responses ——————————————

bivScatter(data1 = subset(mergedData, Prior.COVID.infection. == 'No' & timeCategory == "Post 1st dose" & Tube == "HEP"), name1 = "Naive", 
           data2 = subset(mergedData, Prior.COVID.infection. == 'Yes' & timeCategory == "Post 1st dose" & Tube == "HEP"), name2 = "Experienced", 
           xData = "Age", yData = "CD4_.Nonnaive.cTfh.ICOS..CD38.._FreqParent", fillParam = "Prior.COVID.infection.", title = "Post 1st dose", 
           xLabel = "Age", yLabel = "ICOS+CD38+ (% cTfh)", nonparam = T) +   scale_y_continuous(breaks=seq(0,27,3), limits = c(0,25) )+ scale_x_continuous(limits = c(20,70))
## Warning in cor.test.default(data1[, xData], data1[, yData], method = "kendall"):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(data2[, xData], data2[, yData], method = "kendall"):
## Cannot compute exact p-value with ties
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

# ggsave(filename = "./Images/Age_correl_HiHiTfh_Vax1.pdf", width=8)
bivScatter(data1 = subset(mergedData, Prior.COVID.infection. == 'No' & timeCategory == "Post 2nd dose" & Tube == "HEP"), name1 = "Naive", 
           data2 = subset(mergedData, Prior.COVID.infection. == 'Yes' & timeCategory == "Post 2nd dose" & Tube == "HEP"), name2 = "Experienced", 
           xData = "Age", yData = "CD4_.Nonnaive.cTfh.ICOS..CD38.._FreqParent", fillParam = "Prior.COVID.infection.", title = "Post 2nd dose", 
           xLabel = "Age", yLabel = "ICOS+CD38+ (% cTfh)", nonparam = T) +   scale_y_continuous(breaks=seq(0,27,3), limits = c(0,25) ) + scale_x_continuous(limits = c(20,70))
## Warning in cor.test.default(data1[, xData], data1[, yData], method = "kendall"):
## Cannot compute exact p-value with ties

## Warning in cor.test.default(data1[, xData], data1[, yData], method = "kendall"):
## Cannot compute exact p-value with ties
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 12 rows containing missing values (geom_smooth).

# ggsave(filename = "./Images/Age_correl_HiHiTfh_Vax2.pdf", width=8)

bivScatter(data1 = subset(mergedData, Prior.COVID.infection. == 'No' & timeCategory == "Post 1st dose" & Tube == "HEP"), name1 = "Naive", 
           data2 = subset(mergedData, Prior.COVID.infection. == 'Yes' & timeCategory == "Post 1st dose" & Tube == "HEP"), name2 = "Experienced", 
           xData = "Age", yData = "FCtfh_Vax1", fillParam = "Prior.COVID.infection.", title = "Post 1st dose", 
           xLabel = "Age", yLabel = "Fold-change ICOS+CD38+ cTfh", nonparam = T) +scale_y_continuous(breaks=seq(0,30,1), limits = c(0,6) )+ scale_x_continuous(limits = c(20,70))
## Warning in cor.test.default(data1[, xData], data1[, yData], method = "kendall"):
## Cannot compute exact p-value with ties

## Warning in cor.test.default(data1[, xData], data1[, yData], method = "kendall"):
## Cannot compute exact p-value with ties
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_point).

## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_smooth).

# ggsave(filename = "./Images/Age_correl_FCtfh_Vax1.pdf", width=8)
bivScatter(data1 = subset(mergedData, Prior.COVID.infection. == 'No' & timeCategory == "Post 2nd dose" & Tube == "HEP"), name1 = "Naive", 
           data2 = subset(mergedData, Prior.COVID.infection. == 'Yes' & timeCategory == "Post 2nd dose" & Tube == "HEP"), name2 = "Experienced", 
           xData = "Age", yData = "FCtfh_Vax2", fillParam = "Prior.COVID.infection.", title = "Post 2nd dose", 
           xLabel = "Age", yLabel = "Fold-change ICOS+CD38+ cTfh", nonparam = T) +   scale_y_continuous(breaks=seq(0,30,1), limits = c(0,6) ) + scale_x_continuous(limits = c(20,70))
## Warning in cor.test.default(data1[, xData], data1[, yData], method = "kendall"):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(data2[, xData], data2[, yData], method = "kendall"):
## Cannot compute exact p-value with ties
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (geom_point).

# ggsave(filename = "./Images/Age_correl_FCtfh_Vax2.pdf", width=8)



subsetData <- subset(mergedData, timeCategory == "Post 1st dose")
subsetData <- subsetData[,grep( paste(c("^FC"), collapse = "|"), names(subsetData))]
cor.matrix <- round(cor(subsetData, method="kendall",use="pairwise.complete.obs"), 2)

bivScatter(data1 = subset(mergedData, Prior.COVID.infection. == 'No' & timeCategory == "Post 1st dose" ), name1 = "Naive", 
           data2 = subset(mergedData, Prior.COVID.infection. == 'Yes' & timeCategory == "Post 1st dose"), name2 = "Experienced", 
           xData = "FC_IgG_S1_postVax1", yData = "FCtfh_CXCR3_Vax1", fillParam = "Prior.COVID.infection.", title = "Post 1st dose", 
           xLabel = "Fold-change CD4+Ki67+CD38+", yLabel = "Fold-change ICOS+CD38+ cTfh", nonparam = T) + 
  scale_x_continuous(trans='pseudo_log', breaks=c(10^(0:4)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10)) + 
  scale_y_continuous(trans='pseudo_log', breaks=c(10^(0:4)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 7 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
## Warning: Removed 6 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_point).

subsetData <- subset(mergedData, Prior.COVID.infection. == 'No' & timeCategory == "Post 1st dose")
subsetData <- subsetData[,grep( paste(c("CD4_.Nonnaive.cTfh.ICOS..CD38.._FreqParent","^FCtfh","Age" ), collapse = "|"), names(subsetData))]
cor.matrix <- cor(subsetData, method="kendall" , use="pairwise.complete.obs" )
cor.matrix.pmat <- ggcorrplot::cor_pmat(subsetData, method="kendall", use="pairwise.complete.obs"  )
## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties
## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties
cor.matrix <- as.data.frame(cor.matrix); cor.matrix$Labels <- row.names(cor.matrix); cor.matrix$Prior.COVID <- "No"
cor.matrix <- merge( x = cor.matrix, y = cor.matrix.pmat[,"Age"], by = "row.names"); names(cor.matrix)[grep("y",names(cor.matrix))] <- "Pvalue"

subsetData <- subset(mergedData, Prior.COVID.infection. == 'Yes' & timeCategory == "Post 1st dose")
subsetData <- subsetData[,grep( paste(c("CD4_.Nonnaive.cTfh.ICOS..CD38.._FreqParent","^FCtfh","Age" ), collapse = "|"), names(subsetData))]
cor.matrix2 <- cor(subsetData, method="kendall" , use="pairwise.complete.obs" )
cor.matrix2.pmat <- ggcorrplot::cor_pmat(subsetData, method="kendall", use="pairwise.complete.obs"  )
## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties
cor.matrix2 <- as.data.frame(cor.matrix2); cor.matrix2$Labels <- row.names(cor.matrix2); cor.matrix2$Prior.COVID <- "Yes"
cor.matrix2 <- merge( x = cor.matrix2, y = cor.matrix2.pmat[,"Age"], by = "row.names"); names(cor.matrix2)[grep("y",names(cor.matrix2))] <- "Pvalue"

temp <- as.data.frame(rbind(cor.matrix, cor.matrix2)); temp <- temp[ -grep( paste( c("Age", "Foxp3", "CXCR3"), collapse = "|"), temp$Row.names),]
# 
# ggplot( data = temp, aes(y = Labels,x = Age, fill = Prior.COVID)) + geom_bar(stat='identity',position = 'dodge',width=0.75) + theme_bw() +
#   scale_fill_manual(values=c("#FFDFB1", "#B5B2F1")) + xlab("Correlation with Age") + ylab(" ") + theme(axis.text.y = element_text(angle=0, size = 10)) +
#   ggtitle("Post 1st dose") + geom_vline(xintercept=0, linetype = "dashed") + scale_x_continuous(limits = c(-1,1))
#   

subsetData <- subset(mergedData, Prior.COVID.infection. == 'No' & timeCategory == "Post 2nd dose")
subsetData <- subsetData[,grep( paste(c("CD4_.Nonnaive.cTfh.ICOS..CD38.._FreqParent","Age" ), collapse = "|"), names(subsetData))]
cor.matrix <- cor(subsetData, method="kendall" , use="pairwise.complete.obs" )
cor.matrix.pmat <- ggcorrplot::cor_pmat(subsetData, method="kendall", use="pairwise.complete.obs"  )
## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties
cor.matrix <- as.data.frame(cor.matrix); cor.matrix$Labels <- row.names(cor.matrix); cor.matrix$Prior.COVID <- "No"
cor.matrix <- merge( x = cor.matrix, y = cor.matrix.pmat[,"Age"], by = "row.names"); names(cor.matrix)[grep("y",names(cor.matrix))] <- "Pvalue"

subsetData <- subset(mergedData, Prior.COVID.infection. == 'Yes' & timeCategory == "Post 2nd dose")
subsetData <- subsetData[,grep( paste(c("CD4_.Nonnaive.cTfh.ICOS..CD38.._FreqParent","Age" ), collapse = "|"), names(subsetData))]
cor.matrix2 <- cor(subsetData, method="kendall" , use="pairwise.complete.obs" )
cor.matrix2.pmat <- ggcorrplot::cor_pmat(subsetData, method="kendall", use="pairwise.complete.obs"  )
## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties
cor.matrix2 <- as.data.frame(cor.matrix2); cor.matrix2$Labels <- row.names(cor.matrix2); cor.matrix2$Prior.COVID <- "Yes"
cor.matrix2 <- merge( x = cor.matrix2, y = cor.matrix2.pmat[,"Age"], by = "row.names"); names(cor.matrix2)[grep("y",names(cor.matrix2))] <- "Pvalue"


temp2 <- as.data.frame(rbind(cor.matrix, cor.matrix2)); temp2 <- temp2[ -grep( paste( c("Age", "Foxp3", "CXCR3"), collapse = "|"), temp2$Row.names),]
# 
# ggplot( data = temp2, aes(y = Labels,x = Age, fill = Prior.COVID)) + geom_bar(stat='identity',position = 'dodge',width=0.75) + theme_bw() + 
#   scale_fill_manual(values=c("#FFDFB1", "#B5B2F1")) + xlab("Correlation with Age") + ylab(" ") + theme(axis.text.y = element_text(angle=0, size = 10)) + 
#   ggtitle("Post 2nd dose") + geom_vline(xintercept=0, linetype = "dashed") +  scale_x_continuous(limits = c(-1,1))

# temp <- x; temp2 <- y
temp[ grep("CD4_.Nonnaive.cTfh.ICOS..CD38.._FreqParent", temp$Row.names), "Labels"] <- "Frequency\npost 1st dose"
temp2[ grep("CD4_.Nonnaive.cTfh.ICOS..CD38.._FreqParent", temp2$Row.names), "Labels"] <- "Frequency\npost 2nd dose"
temp[ grep("FCtfh_Vax1",temp$Row.names), "Labels"]  <- "Fold-change\npost 1st dose"
temp[ grep("FCtfh_Vax2",temp$Row.names), "Labels"]  <- "Fold-change\npost 2nd dose"
temp <- temp[,c("Row.names","Age","Labels","Prior.COVID","Pvalue")];  temp2 <- temp2[,c("Row.names", "Age","Labels","Prior.COVID","Pvalue")]  
temp <- as.data.frame(rbind(temp, temp2))
temp$Labels <- factor(temp$Labels, levels = c("Fold-change\npost 2nd dose", "Fold-change\npost 1st dose","Frequency\npost 2nd dose",   "Frequency\npost 1st dose"))
# temp <- temp[c(1:6,8,7),]
ggplot( data = temp, aes(x = Labels,y = Age, fill = Prior.COVID)) + geom_bar(stat='identity',position = position_dodge(width=0.5), width=0.05, color="black", size=0.1) + 
  geom_point(aes(fill=Prior.COVID, size=Pvalue), pch=21, color="black", stroke=0.2, position = position_dodge(width=0.5)) + 
  theme_bw() + scale_size(range = c(8,1), breaks = c(0,0.05,0.1,0.2,0.7), limits = c(0,0.8), trans = 'pseudo_log') + guides(size = guide_legend(reverse=TRUE)) + 
  scale_fill_manual(values=c("#FFC26A", "#B5B2F1")) + ylab("Kendall's tau vs Age") + xlab(" ") + ggtitle("ICOS+CD38+ cTfh") + geom_hline(yintercept=0, linetype = "dashed") +
  theme(axis.text.y = element_text(size = 16, color="black"), plot.title = element_text(size=24), axis.text.x = element_text(size=16, color="black", angle=45, hjust=1,vjust=1), 
        axis.title.x = element_text(size=16, color="black")) + 
  coord_flip() + scale_y_continuous(limits = c(-1,0.5), breaks = seq(-1,1,0.25))

# ggsave(filename = "./Images/Age_Tfhcorrelations_lollipop.pdf", width=5, height = 5)

bivScatter(data1 = subset(mergedData, Prior.COVID.infection. == 'No' & timeCategory == "Post 1st dose" ), name1 = "Naive", 
           data2 = subset(mergedData, Prior.COVID.infection. == 'Yes' & timeCategory == "Post 1st dose"), name2 = "Experienced", 
           xData = "FCActivCD4_Vax1", yData = "FCtfh_Vax1", fillParam = "Prior.COVID.infection.", title = "Post 1st dose", 
           xLabel = "Fold-change CD4+Ki67+CD38+", yLabel = "Fold-change ICOS+CD38+ cTfh", nonparam = T) + 
  scale_x_continuous(trans='pseudo_log', breaks=c(10^(0:4)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10)) + 
  scale_y_continuous(trans='pseudo_log', breaks=c(10^(0:4)), limits = c(0,20), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_point).

# ggsave(filename = "./Images/FCTfh_correl_FCactivCD4_Vax1.pdf", width=8)

bivScatter(data1 = subset(mergedData, Prior.COVID.infection. == 'No' & timeCategory == "Post 1st dose"), name1 = "Naive", 
           data2 = subset(mergedData, Prior.COVID.infection. == 'Yes' & timeCategory == "Post 1st dose"), name2 = "Experienced", 
           xData = "FCActivCD4_Vax2", yData = "FCtfh_Vax2", fillParam = "Prior.COVID.infection.", title = "Post 2nd dose", 
           xLabel = "Fold-change CD4+Ki67+CD38+", yLabel = "Fold-change ICOS+CD38+ cTfh", nonparam = T) + 
  scale_x_continuous(trans='pseudo_log', breaks=c(10^(0:4)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10)) + 
  scale_y_continuous(trans='pseudo_log', breaks=c(10^(0:4)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_point).

## Warning: Removed 2 rows containing missing values (geom_point).

# ggsave(filename = "./Images/FCTfh_correl_FCactivCD4_Vax2.pdf", width=8)

—————— B cell analyses ————————–

subsetData <- subset(mergedData,  timeCategory != "two Weeks" & timeCategory != "2 wks post 2nd dose");   subsetData$timeCategory <- factor(subsetData$timeCategory, levels = c("Baseline", "Post 1st dose", "Pre 2nd dose", "Post 2nd dose","One month post\n2nd dose"))
prePostTime(subsetData, xData = "DPV", yData="CD19_.CD27..CD38._FreqParent", fillParam = "Prior.COVID.infection.", groupby="Record.ID", title = "PB", 
            xLabel = "Prior COVID?", yLabel = "CD27+CD38+ (% CD19)", repMeasures = F) +  geom_vline(xintercept = 0,linetype="dashed", alpha=0.5)+  coord_cartesian(xlim = c(-1,12)) 
## [1] "block2"
## Warning: Removed 10 row(s) containing missing values (geom_path).
## Warning: Removed 11 rows containing missing values (geom_point).

subsetData$DPV <- subsetData$DPV - difftime(subsetData$Vaccine.2.date, subsetData$Vaccine.1.date, units="days" ) 
prePostTime(subsetData, xData = "DPV", yData="CD19_.CD27..CD38._FreqParent", fillParam = "Prior.COVID.infection.", groupby="Record.ID", title = "PB", 
            xLabel = "Prior COVID?", yLabel = "CD27+CD38+ (% CD19)", repMeasures = F) +  geom_vline(xintercept = 0,linetype="dashed", alpha=0.5) +  coord_cartesian(xlim = c(-5,12)) 
## [1] "block2"
## Don't know how to automatically pick scale for object of type difftime. Defaulting to continuous.
## Warning: Removed 10 row(s) containing missing values (geom_path).

## Warning: Removed 11 rows containing missing values (geom_point).

subsetData <- subset(mergedData,  timeCategory != "two Weeks" & timeCategory != "2 wks post 2nd dose");
subsetData <- subsetData[which(subsetData$Tube == "HEP"),]
subsetData$timeCategory <- factor(subsetData$timeCategory, levels = c("Baseline", "Post 1st dose", "Pre 2nd dose", "Post 2nd dose","One month post\n2nd dose"))
prePostTime(subsetData, xData = "timeCategory", yData="CD19_.CD27..CD38._FreqParent", fillParam = "Prior.COVID.infection.", groupby="Record.ID", title = "Plasmablasts", 
            xLabel = " ", yLabel = "CD27+CD38+ (% CD19)", repMeasures = F, newform = T) #  + ggrepel::geom_text_repel(aes(label=Record.ID),size=2)
## [1] "block3"
## `summarise()` regrouping output by 'timeCategory' (override with `.groups` argument)

# ggsave(filename = "./Images/PB_responses_bothCohorts_overTime.pdf")

bartlett.test(CD19_.CD27..CD38._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## 
##  Bartlett test of homogeneity of variances
## 
## data:  CD19_.CD27..CD38._FreqParent by timeCategory
## Bartlett's K-squared = 15.93, df = 4, p-value = 0.003115
kruskal_test(formula = CD19_.CD27..CD38._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No')) 
## # A tibble: 1 x 6
##   .y.                              n statistic    df     p method        
## * <chr>                        <int>     <dbl> <int> <dbl> <chr>         
## 1 CD19_.CD27..CD38._FreqParent    98      2.54     4 0.637 Kruskal-Wallis
dunn_test(CD19_.CD27..CD38._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## # A tibble: 10 x 9
##    .y.         group1  group2        n1    n2 statistic     p p.adj p.adj.signif
##  * <chr>       <chr>   <chr>      <int> <int>     <dbl> <dbl> <dbl> <chr>       
##  1 CD19_.CD27~ Baseli~ "Post 1st~    19    21    0.665  0.506     1 ns          
##  2 CD19_.CD27~ Baseli~ "Pre 2nd ~    19    20    0.0470 0.963     1 ns          
##  3 CD19_.CD27~ Baseli~ "Post 2nd~    19    20   -0.338  0.736     1 ns          
##  4 CD19_.CD27~ Baseli~ "One mont~    19    18    1.05   0.292     1 ns          
##  5 CD19_.CD27~ Post 1~ "Pre 2nd ~    21    20   -0.626  0.532     1 ns          
##  6 CD19_.CD27~ Post 1~ "Post 2nd~    21    20   -1.02   0.308     1 ns          
##  7 CD19_.CD27~ Post 1~ "One mont~    21    18    0.425  0.671     1 ns          
##  8 CD19_.CD27~ Pre 2n~ "Post 2nd~    20    20   -0.390  0.697     1 ns          
##  9 CD19_.CD27~ Pre 2n~ "One mont~    20    18    1.02   0.307     1 ns          
## 10 CD19_.CD27~ Post 2~ "One mont~    20    18    1.40   0.161     1 ns
bartlett.test(CD19_.CD27..CD38._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes'))
## 
##  Bartlett test of homogeneity of variances
## 
## data:  CD19_.CD27..CD38._FreqParent by timeCategory
## Bartlett's K-squared = 12.612, df = 4, p-value = 0.01333
kruskal_test(formula = CD19_.CD27..CD38._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes')) 
## # A tibble: 1 x 6
##   .y.                              n statistic    df     p method        
## * <chr>                        <int>     <dbl> <int> <dbl> <chr>         
## 1 CD19_.CD27..CD38._FreqParent    61      3.15     4 0.532 Kruskal-Wallis
dunn_test(CD19_.CD27..CD38._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes'))
## # A tibble: 10 x 9
##    .y.        group1  group2        n1    n2 statistic      p p.adj p.adj.signif
##  * <chr>      <chr>   <chr>      <int> <int>     <dbl>  <dbl> <dbl> <chr>       
##  1 CD19_.CD2~ Baseli~ "Post 1st~     6    14    -0.147 0.883  1     ns          
##  2 CD19_.CD2~ Baseli~ "Pre 2nd ~     6    13     0.407 0.684  1     ns          
##  3 CD19_.CD2~ Baseli~ "Post 2nd~     6    14     0.542 0.588  1     ns          
##  4 CD19_.CD2~ Baseli~ "One mont~     6    14     1.15  0.249  1     ns          
##  5 CD19_.CD2~ Post 1~ "Pre 2nd ~    14    13     0.708 0.479  1     ns          
##  6 CD19_.CD2~ Post 1~ "Post 2nd~    14    14     0.889 0.374  1     ns          
##  7 CD19_.CD2~ Post 1~ "One mont~    14    14     1.68  0.0934 0.934 ns          
##  8 CD19_.CD2~ Pre 2n~ "Post 2nd~    13    14     0.165 0.869  1     ns          
##  9 CD19_.CD2~ Pre 2n~ "One mont~    13    14     0.938 0.348  1     ns          
## 10 CD19_.CD2~ Post 2~ "One mont~    14    14     0.788 0.431  1     ns
# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "CD19_.CD27..CD38._FreqParent")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/figs3E.csv")


subsetData <- subset(mergedData, timeCategory != "two Weeks" & timeCategory != "2 wks post 2nd dose");
subsetData <- subsetData[which(subsetData$Tube == "HEP"),] 
subsetData$timeCategory <- factor(subsetData$timeCategory, levels = c("Baseline", "Post 1st dose", "Pre 2nd dose", "Post 2nd dose", "One month post\n2nd dose"))
subsetData$CD19_.CD27..CD38..CD138._FreqParent <- subsetData$CD19_.CD27..CD38..CD138._FreqParent * subsetData$CD19_.CD27..CD38._FreqParent / 100
if( is.na(sum(subsetData$CD19_.CD27..CD38..CD138._FreqParent, na.rm = F)))    # look for NA values, should give NA result if true
{ subsetData <- subsetData[-which(is.na(subsetData$CD19_.CD27..CD38..CD138._FreqParent)), ]  }
prePostTime(data=subsetData, xData = "timeCategory", yData="CD19_.CD27..CD38..CD138._FreqParent", fillParam = "Prior.COVID.infection.", groupby="Record.ID", 
            title = "CD138+ Plasmablasts", xLabel = " ", yLabel = "CD27+CD38+CD138+CD20- (% CD19)", repMeasures = F, newform = T) + 
  scale_y_continuous(breaks = seq(0,2,0.1), limits = c(0,0.6))
## [1] "block3"
## `summarise()` regrouping output by 'timeCategory' (override with `.groups` argument)

# ggsave(filename = "./Images/PB_deepPhenotype_bothCohorts_overTime.pdf")
bartlett.test(CD19_.CD27..CD38..CD138._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## 
##  Bartlett test of homogeneity of variances
## 
## data:  CD19_.CD27..CD38..CD138._FreqParent by timeCategory
## Bartlett's K-squared = 22.162, df = 4, p-value = 0.0001861
kruskal_test(formula = CD19_.CD27..CD38..CD138._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No')) 
## # A tibble: 1 x 6
##   .y.                                     n statistic    df      p method       
## * <chr>                               <int>     <dbl> <int>  <dbl> <chr>        
## 1 CD19_.CD27..CD38..CD138._FreqParent    98      11.2     4 0.0239 Kruskal-Wall~
dunn_test(CD19_.CD27..CD38..CD138._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## # A tibble: 10 x 9
##    .y.        group1  group2      n1    n2 statistic       p  p.adj p.adj.signif
##  * <chr>      <chr>   <chr>    <int> <int>     <dbl>   <dbl>  <dbl> <chr>       
##  1 CD19_.CD2~ Baseli~ "Post 1~    19    21     2.31  0.0206  0.186  ns          
##  2 CD19_.CD2~ Baseli~ "Pre 2n~    19    20     0.339 0.734   1      ns          
##  3 CD19_.CD2~ Baseli~ "Post 2~    19    20     0.114 0.909   1      ns          
##  4 CD19_.CD2~ Baseli~ "One mo~    19    18    -0.869 0.385   1      ns          
##  5 CD19_.CD2~ Post 1~ "Pre 2n~    21    20    -2.00  0.0457  0.320  ns          
##  6 CD19_.CD2~ Post 1~ "Post 2~    21    20    -2.23  0.0258  0.207  ns          
##  7 CD19_.CD2~ Post 1~ "One mo~    21    18    -3.17  0.00152 0.0152 *           
##  8 CD19_.CD2~ Pre 2n~ "Post 2~    20    20    -0.228 0.820   1      ns          
##  9 CD19_.CD2~ Pre 2n~ "One mo~    20    18    -1.21  0.225   1      ns          
## 10 CD19_.CD2~ Post 2~ "One mo~    20    18    -0.993 0.321   1      ns
bartlett.test(CD19_.CD27..CD38..CD138._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes'))
## 
##  Bartlett test of homogeneity of variances
## 
## data:  CD19_.CD27..CD38..CD138._FreqParent by timeCategory
## Bartlett's K-squared = 27.109, df = 4, p-value = 1.89e-05
kruskal_test(formula = CD19_.CD27..CD38..CD138._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes')) 
## # A tibble: 1 x 6
##   .y.                                     n statistic    df     p method        
## * <chr>                               <int>     <dbl> <int> <dbl> <chr>         
## 1 CD19_.CD27..CD38..CD138._FreqParent    61      3.13     4 0.537 Kruskal-Wallis
dunn_test(CD19_.CD27..CD38..CD138._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes'))
## # A tibble: 10 x 9
##    .y.         group1   group2       n1    n2 statistic     p p.adj p.adj.signif
##  * <chr>       <chr>    <chr>     <int> <int>     <dbl> <dbl> <dbl> <chr>       
##  1 CD19_.CD27~ Baseline "Post 1s~     6    14     0.305 0.760     1 ns          
##  2 CD19_.CD27~ Baseline "Pre 2nd~     6    13     0.645 0.519     1 ns          
##  3 CD19_.CD27~ Baseline "Post 2n~     6    14     0.932 0.351     1 ns          
##  4 CD19_.CD27~ Baseline "One mon~     6    14     1.43  0.154     1 ns          
##  5 CD19_.CD27~ Post 1s~ "Pre 2nd~    14    13     0.440 0.660     1 ns          
##  6 CD19_.CD27~ Post 1s~ "Post 2n~    14    14     0.809 0.418     1 ns          
##  7 CD19_.CD27~ Post 1s~ "One mon~    14    14     1.45  0.148     1 ns          
##  8 CD19_.CD27~ Pre 2nd~ "Post 2n~    13    14     0.354 0.724     1 ns          
##  9 CD19_.CD27~ Pre 2nd~ "One mon~    13    14     0.980 0.327     1 ns          
## 10 CD19_.CD27~ Post 2n~ "One mon~    14    14     0.639 0.523     1 ns
# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "CD19_.CD27..CD38..CD138._FreqParent")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/figs3F.csv")

—————— CD21lo B cell analyses ————————–

subsetData <- subset(mergedData,  timeCategory != "two Weeks"  & timeCategory != "2 wks post 2nd dose");  
subsetData <- subsetData[which(subsetData$Tube == "HEP"),]
subsetData <- subset(subsetData, Label != "PHI-398_V2" & Label != "PHI-021_V1" & Label != "HV-079_V1" )   # exclude because failed QC for CD21lo:  CV-028_PHI-398_oW and CV-030_HV-078_4W and CV-022_PHI-021_bL

subsetData$timeCategory <- factor(subsetData$timeCategory, levels = c("Baseline", "Post 1st dose", "Pre 2nd dose", "Post 2nd dose", "One month post\n2nd dose"))
prePostTime(data = subsetData, xData = "timeCategory", yData="CD19_.CD21lo_FreqParent", fillParam = "Prior.COVID.infection.", groupby="Record.ID", 
            title = "CD21lo B cells", xLabel = " ", yLabel = "CD21lo (% CD19)", repMeasures=F, newform = T) + 
  scale_y_continuous(breaks = seq(0,20,1)) 
## [1] "block3"
## `summarise()` regrouping output by 'timeCategory' (override with `.groups` argument)

# ggsave(filename = "./Images/CD21lo_bothCohorts_overTime.pdf")

bartlett.test(CD19_.CD21lo_FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## 
##  Bartlett test of homogeneity of variances
## 
## data:  CD19_.CD21lo_FreqParent by timeCategory
## Bartlett's K-squared = 9.6262, df = 4, p-value = 0.04722
kruskal_test(formula = CD19_.CD21lo_FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No')) 
## # A tibble: 1 x 6
##   .y.                         n statistic    df     p method        
## * <chr>                   <int>     <dbl> <int> <dbl> <chr>         
## 1 CD19_.CD21lo_FreqParent    97      3.81     4 0.432 Kruskal-Wallis
dunn_test(CD19_.CD21lo_FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## # A tibble: 10 x 9
##    .y.       group1   group2        n1    n2 statistic      p p.adj p.adj.signif
##  * <chr>     <chr>    <chr>      <int> <int>     <dbl>  <dbl> <dbl> <chr>       
##  1 CD19_.CD~ Baseline "Post 1st~    18    21    -0.924 0.355  1     ns          
##  2 CD19_.CD~ Baseline "Pre 2nd ~    18    20     0.227 0.820  1     ns          
##  3 CD19_.CD~ Baseline "Post 2nd~    18    20     0.878 0.380  1     ns          
##  4 CD19_.CD~ Baseline "One mont~    18    18     0.417 0.676  1     ns          
##  5 CD19_.CD~ Post 1s~ "Pre 2nd ~    21    20     1.19  0.235  1     ns          
##  6 CD19_.CD~ Post 1s~ "Post 2nd~    21    20     1.86  0.0625 0.625 ns          
##  7 CD19_.CD~ Post 1s~ "One mont~    21    18     1.36  0.175  1     ns          
##  8 CD19_.CD~ Pre 2nd~ "Post 2nd~    20    20     0.669 0.504  1     ns          
##  9 CD19_.CD~ Pre 2nd~ "One mont~    20    18     0.201 0.840  1     ns          
## 10 CD19_.CD~ Post 2n~ "One mont~    20    18    -0.449 0.653  1     ns
bartlett.test(CD19_.CD21lo_FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes'))
## 
##  Bartlett test of homogeneity of variances
## 
## data:  CD19_.CD21lo_FreqParent by timeCategory
## Bartlett's K-squared = 2.0852, df = 4, p-value = 0.7201
dunn_test(CD19_.CD21lo_FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes')) 
## # A tibble: 10 x 9
##    .y.       group1   group2        n1    n2 statistic      p p.adj p.adj.signif
##  * <chr>     <chr>    <chr>      <int> <int>     <dbl>  <dbl> <dbl> <chr>       
##  1 CD19_.CD~ Baseline "Post 1st~     6    13     1.04  0.299  1     ns          
##  2 CD19_.CD~ Baseline "Pre 2nd ~     6    13     1.82  0.0694 0.694 ns          
##  3 CD19_.CD~ Baseline "Post 2nd~     6    14     1.30  0.193  1     ns          
##  4 CD19_.CD~ Baseline "One mont~     6    14     0.753 0.451  1     ns          
##  5 CD19_.CD~ Post 1s~ "Pre 2nd ~    13    13     0.977 0.329  1     ns          
##  6 CD19_.CD~ Post 1s~ "Post 2nd~    13    14     0.318 0.750  1     ns          
##  7 CD19_.CD~ Post 1s~ "One mont~    13    14    -0.377 0.706  1     ns          
##  8 CD19_.CD~ Pre 2nd~ "Post 2nd~    13    14    -0.677 0.499  1     ns          
##  9 CD19_.CD~ Pre 2nd~ "One mont~    13    14    -1.37  0.170  1     ns          
## 10 CD19_.CD~ Post 2n~ "One mont~    14    14    -0.709 0.478  1     ns
# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "CD19_.CD21lo_FreqParent")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/figs3H.csv")

—————— CD71+ IgD- B cell analyses ————————–

subsetData <- subset(mergedData,  timeCategory != "two Weeks" & timeCategory != "2 wks post 2nd dose" );  
subsetData$timeCategory <- factor(subsetData$timeCategory, levels = c("Baseline", "Post 1st dose", "Pre 2nd dose", "Post 2nd dose", "One month post\n2nd dose"))
subsetData <- subsetData[which(subsetData$Tube == "HEP"),]
prePostTime(data = subsetData, xData = "timeCategory", yData="CD19_.IgD..CD71._FreqParent", fillParam = "Prior.COVID.infection.", groupby="Record.ID", 
            title = "CD71+ B cells", xLabel = "", yLabel = "IgD-CD71+ (% CD19+)", repMeasures=F, newform = T) 
## [1] "block3"
## `summarise()` regrouping output by 'timeCategory' (override with `.groups` argument)

# ggsave(filename = "./Images/CD71hi_Bcells_bothCohorts_overTime.pdf")
bartlett.test(CD19_.IgD..CD71._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## 
##  Bartlett test of homogeneity of variances
## 
## data:  CD19_.IgD..CD71._FreqParent by timeCategory
## Bartlett's K-squared = 17.009, df = 4, p-value = 0.001925
kruskal_test(formula = CD19_.IgD..CD71._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No')) 
## # A tibble: 1 x 6
##   .y.                             n statistic    df     p method        
## * <chr>                       <int>     <dbl> <int> <dbl> <chr>         
## 1 CD19_.IgD..CD71._FreqParent    98      2.49     4 0.646 Kruskal-Wallis
dunn_test(CD19_.IgD..CD71._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## # A tibble: 10 x 9
##    .y.        group1   group2        n1    n2 statistic     p p.adj p.adj.signif
##  * <chr>      <chr>    <chr>      <int> <int>     <dbl> <dbl> <dbl> <chr>       
##  1 CD19_.IgD~ Baseline "Post 1st~    19    21    0.848  0.396     1 ns          
##  2 CD19_.IgD~ Baseline "Pre 2nd ~    19    20    0.778  0.437     1 ns          
##  3 CD19_.IgD~ Baseline "Post 2nd~    19    20    1.33   0.185     1 ns          
##  4 CD19_.IgD~ Baseline "One mont~    19    18    0.0644 0.949     1 ns          
##  5 CD19_.IgD~ Post 1s~ "Pre 2nd ~    21    20   -0.0620 0.951     1 ns          
##  6 CD19_.IgD~ Post 1s~ "Post 2nd~    21    20    0.501  0.617     1 ns          
##  7 CD19_.IgD~ Post 1s~ "One mont~    21    18   -0.770  0.441     1 ns          
##  8 CD19_.IgD~ Pre 2nd~ "Post 2nd~    20    20    0.556  0.578     1 ns          
##  9 CD19_.IgD~ Pre 2nd~ "One mont~    20    18   -0.702  0.483     1 ns          
## 10 CD19_.IgD~ Post 2n~ "One mont~    20    18   -1.24   0.214     1 ns
bartlett.test(CD19_.IgD..CD71._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes'))
## 
##  Bartlett test of homogeneity of variances
## 
## data:  CD19_.IgD..CD71._FreqParent by timeCategory
## Bartlett's K-squared = 7.5475, df = 4, p-value = 0.1096
dunn_test(CD19_.IgD..CD71._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes')) 
## # A tibble: 10 x 9
##    .y.        group1   group2        n1    n2 statistic     p p.adj p.adj.signif
##  * <chr>      <chr>    <chr>      <int> <int>     <dbl> <dbl> <dbl> <chr>       
##  1 CD19_.IgD~ Baseline "Post 1st~     6    14    0.921  0.357     1 ns          
##  2 CD19_.IgD~ Baseline "Pre 2nd ~     6    13    1.45   0.146     1 ns          
##  3 CD19_.IgD~ Baseline "Post 2nd~     6    14    0.888  0.375     1 ns          
##  4 CD19_.IgD~ Baseline "One mont~     6    14    1.14   0.253     1 ns          
##  5 CD19_.IgD~ Post 1s~ "Pre 2nd ~    14    13    0.696  0.486     1 ns          
##  6 CD19_.IgD~ Post 1s~ "Post 2nd~    14    14   -0.0426 0.966     1 ns          
##  7 CD19_.IgD~ Post 1s~ "One mont~    14    14    0.287  0.774     1 ns          
##  8 CD19_.IgD~ Pre 2nd~ "Post 2nd~    13    14   -0.738  0.460     1 ns          
##  9 CD19_.IgD~ Pre 2nd~ "One mont~    13    14   -0.414  0.679     1 ns          
## 10 CD19_.IgD~ Post 2n~ "One mont~    14    14    0.330  0.741     1 ns
# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "CD19_.IgD..CD71._FreqParent")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/figs3J.csv")

—————— CXCL13 analyses ————————–

subsetData <- subset(mergedData, !is.na(mergedData$CXCL13))
subsetData %>% group_by( timeCategory, Prior.COVID.infection.) %>% get_summary_stats(CXCL13, type = "common") %>% print(n=500)
## # A tibble: 8 x 12
##   timeCategory Prior.COVID.inf~ variable     n   min   max median   iqr  mean
##   <fct>        <chr>            <chr>    <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl>
## 1 Baseline     No               CXCL13       9  8.34  54.1   23.6  25.8  25.6
## 2 Baseline     Yes              CXCL13       9  9     37.9   24.6   5.6  23.6
## 3 Post 1st do~ No               CXCL13       9 14.7   37.9   23.9  10.6  24.6
## 4 Post 1st do~ Yes              CXCL13       9  8.74  45.1   20.6   5.9  22.3
## 5 Pre 2nd dose No               CXCL13       9 15     39.5   25.3   8.6  27.4
## 6 Pre 2nd dose Yes              CXCL13       9 11.6   29.4   22     4.7  20.7
## 7 Post 2nd do~ No               CXCL13       9 14.5   39.1   26.7  12    25.9
## 8 Post 2nd do~ Yes              CXCL13       9 12.5   63.7   22.6  12.6  25.6
## # ... with 3 more variables: sd <dbl>, se <dbl>, ci <dbl>
linePlot(data = mergedData, xData = 'timeCategory', yData = 'CXCL13', groupby = 'Record.ID', xLabel = ' ', yLabel = "CXCL13 (pg/mL)", 
         title = "Plasma CXCL13", colorby = "Prior.COVID.infection.") + theme(axis.title.x = element_blank()) + 
  scale_color_manual(name="Prior COVID?",values = c("#FFC26A","#B5B2F1")) + 
  scale_y_continuous(limits = c(0,140),breaks=seq(0,140,10))
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

# ggsave(filename = "./Images/CXCL13plasma_linePlot.pdf")

acuteCOVID <- read.csv(file = "D:/COVID/Infection/Analysis/Rscripts/COVIDmergedData.csv")
acuteCOVID$dummy <- "Acute COVID "
ggplot(data = subset(acuteCOVID, DPO<30), aes(x = dummy , y = CXCL13..pg.mL.)) + ggbeeswarm::geom_quasirandom( alpha=0.4, color="black", size=3) + 
  ggtitle("COVID-19") + ylab("Plasma CXCL13 (pg/mL)") + theme_bw() +
  theme(axis.text = element_text(color="black",size=18), axis.title = element_text(size=24), axis.text.x = element_text(angle=45, hjust=1,vjust=1),
        plot.title=element_text(size=28), axis.title.x = element_blank()) + 
  scale_y_continuous(breaks = seq(0,150,10), limits = c(0,140))
## Warning: Removed 11 rows containing missing values (position_quasirandom).

# ggsave( filename = "./Images/CXCL13plasma_acuteCOVID.pdf", width=3)

—————— ELISpot analyses ————————–

subsetData <- subset(mergedData, timeCategory == 'Post 1st dose')
a.1 <- twoSampleBar(data = subsetData, xData = 'Prior.COVID.infection.',yData='Elispot_IgG_S1', fillParam = 'Prior.COVID.infection.',title = "S1", 
                    yLabel = "ASC per 1e6 PBMC", nonparam=T) + 
  scale_y_continuous(trans='pseudo_log', limits = c(0,10000), breaks=c(10^(0:4)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10))
a.2 <- twoSampleBar(data = subsetData, xData = 'Prior.COVID.infection.',yData='Elispot_IgG_S2', fillParam = 'Prior.COVID.infection.',title = "S2", 
                    yLabel = " ", nonparam=T)+ 
  scale_y_continuous(trans='pseudo_log', limits = c(0,10000), breaks=c(10^(0:4)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10))
a.3 <- twoSampleBar(data = subsetData, xData = 'Prior.COVID.infection.',yData='Elispot_IgG_RBD', fillParam = 'Prior.COVID.infection.',title = "RBD", 
                    yLabel = " ", nonparam=T)+ 
  scale_y_continuous(trans='pseudo_log', limits = c(0,10000), breaks=c(10^(0:4)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10))
grid.arrange(a.1,a.2,a.3, nrow=1)
## Warning: Removed 14 rows containing missing values (position_quasirandom).

## Warning: Removed 14 rows containing missing values (position_quasirandom).

## Warning: Removed 14 rows containing missing values (position_quasirandom).

# ggpubr::ggexport(plotlist = list(a.1, a.2, a.3), filename = "./Images/Elispots_IgG_post1stDose_gridarrange.pdf", nrow = 1, width = 12)

# 
# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "Elispot_IgG_S1")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/fig4C-1.csv")
# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "Elispot_IgG_S2")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/fig4C-2.csv")
# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "Elispot_IgG_RBD")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/fig4C-3.csv")


a.1 <- twoSampleBar(data = subsetData, xData = 'Prior.COVID.infection.',yData='Elispot_IgA_S1', fillParam = 'Prior.COVID.infection.',title = "S1", 
                    yLabel = "ASC per 1e6 PBMC", nonparam=T) + 
  scale_y_continuous(trans='pseudo_log', limits = c(0,10000), breaks=c(10^(0:4)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10))
a.2 <- twoSampleBar(data = subsetData, xData = 'Prior.COVID.infection.',yData='Elispot_IgA_S2', fillParam = 'Prior.COVID.infection.',title = "S2", 
                    yLabel = " ", nonparam=T)+ 
  scale_y_continuous(trans='pseudo_log', limits = c(0,10000), breaks=c(10^(0:4)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10))
a.3 <- twoSampleBar(data = subsetData, xData = 'Prior.COVID.infection.',yData='Elispot_IgA_RBD', fillParam = 'Prior.COVID.infection.',title = "RBD", 
                    yLabel = " ", nonparam=T)+ 
  scale_y_continuous(trans='pseudo_log', limits = c(0,10000), breaks=c(10^(0:4)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10))
grid.arrange(a.1,a.2,a.3, nrow=1)
## Warning: Removed 14 rows containing missing values (position_quasirandom).

## Warning: Removed 14 rows containing missing values (position_quasirandom).

## Warning: Removed 14 rows containing missing values (position_quasirandom).

# ggpubr::ggexport(plotlist = list(a.1, a.2, a.3), filename = "./Images/Elispots_IgA_post1stDose_gridarrange.pdf", nrow = 1, width = 12 )

# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "Elispot_IgA_S1")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/fig4D-1.csv")
# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "Elispot_IgA_S2")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/fig4D-2.csv")
# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "Elispot_IgA_RBD")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/fig4D-3.csv")


a.1 <- twoSampleBar(data = subsetData, xData = 'Prior.COVID.infection.',yData='Elispot_IgM_S1', fillParam = 'Prior.COVID.infection.',title = "S1", 
                    yLabel = "ASC per 1e6 PBMC", nonparam=T)+ 
  scale_y_continuous(trans='pseudo_log', limits = c(0,10000), breaks=c(10^(0:4)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10))
a.2 <- twoSampleBar(data = subsetData, xData = 'Prior.COVID.infection.',yData='Elispot_IgM_S2', fillParam = 'Prior.COVID.infection.',title = "S2", 
                    yLabel = " ", nonparam=T)+ 
  scale_y_continuous(trans='pseudo_log', limits = c(0,10000), breaks=c(10^(0:4)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10))
a.3 <- twoSampleBar(data = subsetData, xData = 'Prior.COVID.infection.',yData='Elispot_IgM_RBD', fillParam = 'Prior.COVID.infection.',title = "RBD", 
                    yLabel = " ", nonparam=T)+ 
  scale_y_continuous(trans='pseudo_log', limits = c(0,10000), breaks=c(10^(0:4)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10))
grid.arrange(a.1,a.2,a.3, nrow=1)
## Warning: Removed 14 rows containing missing values (position_quasirandom).
## Warning in f(...): The default behavior of beeswarm has changed in version
## 0.6.0. In versions <0.6.0, this plot would have been dodged on the y-axis. In
## versions >=0.6.0, grouponX=FALSE must be explicitly set to group on y-axis.
## Please set grouponX=TRUE/FALSE to avoid this warning and ensure proper axis
## choice.
## Warning: Removed 14 rows containing missing values (position_quasirandom).

## Warning: Removed 14 rows containing missing values (position_quasirandom).

# ggpubr::ggexport(plotlist = list(a.1, a.2, a.3), filename = "./Images/Elispots_IgM_post1stDose_gridarrange.pdf", nrow = 1, width = 12 )
# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "Elispot_IgM_S1")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/figs4C-1.csv")
# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "Elispot_IgM_S2")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/figs4C-2.csv")
# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "Elispot_IgM_RBD")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/figs4C-3.csv")


subsetData <- subset(mergedData, timeCategory == 'Post 2nd dose')
a.1 <- twoSampleBar(data = subsetData, xData = 'Prior.COVID.infection.',yData='Elispot_IgG_S1', fillParam = 'Prior.COVID.infection.',title = "S1", 
                    yLabel = "ASC per 1e6 PBMC", nonparam=T) + 
  scale_y_continuous(trans='pseudo_log', limits = c(0,10000), breaks=c(10^(0:4)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10))
a.2 <- twoSampleBar(data = subsetData, xData = 'Prior.COVID.infection.',yData='Elispot_IgG_S2', fillParam = 'Prior.COVID.infection.',title = "S2", 
                    yLabel = " ", nonparam=T)+ 
  scale_y_continuous(trans='pseudo_log', limits = c(0,10000), breaks=c(10^(0:4)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10))
a.3 <- twoSampleBar(data = subsetData, xData = 'Prior.COVID.infection.',yData='Elispot_IgG_RBD', fillParam = 'Prior.COVID.infection.',title = "RBD", 
                    yLabel = " ", nonparam=T)+ 
  scale_y_continuous(trans='pseudo_log', limits = c(0,10000), breaks=c(10^(0:4)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10))
grid.arrange(a.1,a.2,a.3, nrow=1)
## Warning: Removed 20 rows containing missing values (position_quasirandom).
## Warning: Removed 20 rows containing missing values (position_quasirandom).

## Warning: Removed 20 rows containing missing values (position_quasirandom).

# ggpubr::ggexport(plotlist = list(a.1, a.2, a.3), filename = "./Images/Elispots_IgG_post2ndDose_gridarrange.pdf", nrow = 1, width = 12)
# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "Elispot_IgG_S1")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/fig4E-1.csv")
# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "Elispot_IgG_S2")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/fig4E-2.csv")
# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "Elispot_IgG_RBD")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/fig4E-3.csv")


a.1 <- twoSampleBar(data = subsetData, xData = 'Prior.COVID.infection.',yData='Elispot_IgA_S1', fillParam = 'Prior.COVID.infection.',title = "S1", 
                    yLabel = "ASC per 1e6 PBMC", nonparam=T) + 
  scale_y_continuous(trans='pseudo_log', limits = c(0,10000), breaks=c(10^(0:4)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10))
a.2 <- twoSampleBar(data = subsetData, xData = 'Prior.COVID.infection.',yData='Elispot_IgA_S2', fillParam = 'Prior.COVID.infection.',title = "S2", 
                    yLabel = " ", nonparam=T)+ 
  scale_y_continuous(trans='pseudo_log', limits = c(0,10000), breaks=c(10^(0:4)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10))
a.3 <- twoSampleBar(data = subsetData, xData = 'Prior.COVID.infection.',yData='Elispot_IgA_RBD', fillParam = 'Prior.COVID.infection.',title = "RBD", 
                    yLabel = " ", nonparam=T)+ 
  scale_y_continuous(trans='pseudo_log', limits = c(0,10000), breaks=c(10^(0:4)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10))
grid.arrange(a.1,a.2,a.3, nrow=1)
## Warning: Removed 21 rows containing missing values (position_quasirandom).
## Warning: Removed 21 rows containing missing values (position_quasirandom).

## Warning: Removed 21 rows containing missing values (position_quasirandom).

# ggpubr::ggexport(plotlist = list(a.1, a.2, a.3), filename = "./Images/Elispots_IgA_post2ndDose_gridarrange.pdf", nrow = 1, width = 12 )
# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "Elispot_IgA_S1")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/fig4F-1.csv")
# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "Elispot_IgA_S2")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/fig4F-2.csv")
# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "Elispot_IgA_RBD")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/fig4F-3.csv")


a.1 <- twoSampleBar(data = subsetData, xData = 'Prior.COVID.infection.',yData='Elispot_IgM_S1', fillParam = 'Prior.COVID.infection.',title = "S1", 
                    yLabel = "ASC per 1e6 PBMC", nonparam=T)+ 
  scale_y_continuous(trans='pseudo_log', limits = c(0,10000), breaks=c(10^(0:4)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10))
a.2 <- twoSampleBar(data = subsetData, xData = 'Prior.COVID.infection.',yData='Elispot_IgM_S2', fillParam = 'Prior.COVID.infection.',title = "S2", 
                    yLabel = " ", nonparam=T)+ 
  scale_y_continuous(trans='pseudo_log', limits = c(0,10000), breaks=c(10^(0:4)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10))
a.3 <- twoSampleBar(data = subsetData, xData = 'Prior.COVID.infection.',yData='Elispot_IgM_RBD', fillParam = 'Prior.COVID.infection.',title = "RBD", 
                    yLabel = " ", nonparam=T)+ 
  scale_y_continuous(trans='pseudo_log', limits = c(0,10000), breaks=c(10^(0:4)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10))
grid.arrange(a.1,a.2,a.3, nrow=1)
## Warning: Removed 21 rows containing missing values (position_quasirandom).

## Warning: Removed 21 rows containing missing values (position_quasirandom).
## Warning in f(...): The default behavior of beeswarm has changed in version
## 0.6.0. In versions <0.6.0, this plot would have been dodged on the y-axis. In
## versions >=0.6.0, grouponX=FALSE must be explicitly set to group on y-axis.
## Please set grouponX=TRUE/FALSE to avoid this warning and ensure proper axis
## choice.
## Warning: Removed 21 rows containing missing values (position_quasirandom).

# ggpubr::ggexport(plotlist = list(a.1, a.2, a.3), filename = "./Images/Elispots_IgM_post2ndDose_gridarrange.pdf", nrow = 1, width = 12 )

# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "Elispot_IgM_S1")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/figs4c-1post2.csv")
# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "Elispot_IgM_S2")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/figs4c-2post2.csv")
# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "Elispot_IgM_RBD")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/figs4c-3post2.csv")
# 


subsetData <- subset(mergedData, !is.na(Elispot_IgG_S1) & timeCategory != "Pre 2nd dose" )
prePostTime(data = subsetData, xData = "timeCategory", yData = "Elispot_IgG_S1", fillParam = "Prior.COVID.infection.", 
            groupby = "Record.ID", title = "S1", xLabel = " ", yLabel = "IgG ASC per 1e6 PBMC", exponential=T)+ 
  scale_y_continuous(trans='pseudo_log', limits = c(0,10000), breaks=c(10^(0:4)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10)) 

## NULL
# ggsave(filename = "./Images/Elispots_IgG_S1_prepostTime.pdf")

# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "Elispot_IgG_S1")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/fig4G.csv")


subsetData <- subset(mergedData, !is.na(Elispot_IgG_S2) & timeCategory != "Pre 2nd dose" )
prePostTime(data = subsetData, xData = "timeCategory", yData = "Elispot_IgG_S2", fillParam = "Prior.COVID.infection.", 
            groupby = "Record.ID", title = "S2", xLabel = " ", yLabel = "IgG ASC per 1e6 PBMC", exponential=T)+ 
  scale_y_continuous(trans='pseudo_log', limits = c(0,10000), breaks=c(10^(0:4)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10))

## NULL
# ggsave(filename =  "./Images/Elispots_IgG_S2_prepostTime.pdf")

# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "Elispot_IgG_S2")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/fig4H.csv")


subsetData <- subset(mergedData, !is.na(Elispot_IgG_RBD) & timeCategory != "Pre 2nd dose" )
prePostTime(data = subsetData, xData = "timeCategory", yData = "Elispot_IgG_RBD", fillParam = "Prior.COVID.infection.", 
            groupby = "Record.ID", title = "RBD", xLabel = " ", yLabel = "IgG ASC per 1e6 PBMC", exponential=T)+ 
  scale_y_continuous(trans='pseudo_log', limits = c(0,10000), breaks=c(10^(0:4)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10))

## NULL
# ggsave(filename = "./Images/Elispots_IgG_RBD_prepostTime.pdf")

# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "Elispot_IgG_RBD")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/fig4I.csv")


bivScatter(data1 = subset(mergedData, Prior.COVID.infection. == 'No' & timeCategory == "Post 2nd dose"), 
           data2 = subset(mergedData, Prior.COVID.infection. == "Yes" & timeCategory == "Post 2nd dose"),
           name1 = "Naive", name2 = "Experienced", xData = "Elispot_IgG_RBD", yData = 'Elispot_IgG_S1', fillParam = 'Prior.COVID.infection.',
           title = "IgG ELISpots post 2nd dose", xLabel = "RBD ASC per 10^6 PBMC", yLabel = "S1 ASC per 10^6 PBMC", statsOff = F) + 
  scale_y_continuous(trans='pseudo_log', limits = c(0,3e4), breaks=c(10^(0:4)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10)) + 
  scale_x_continuous(trans='pseudo_log', limits = c(0,1e4), breaks=c(10^(0:4)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10)) 
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 13 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 7 rows containing non-finite values (stat_smooth).
## Warning: Removed 13 rows containing missing values (geom_point).
## Warning: Removed 7 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_smooth).
## Warning: Removed 4 rows containing missing values (geom_smooth).

# ggsave(filename = "./Images/Elispots_IgG_S1-vs-RBD_correl_biv.pdf", width=8)

bivScatter(data1 = subset(mergedData, Prior.COVID.infection. == 'No' & timeCategory == "Post 2nd dose"), 
           data2 = subset(mergedData, Prior.COVID.infection. == "Yes" & timeCategory == "Post 2nd dose"),
           name1 = "Naive", name2 = "Experienced", xData = "Elispot_IgG_RBD", yData = 'Elispot_IgG_S2', fillParam = 'Prior.COVID.infection.',
           title = "IgG ELISpots post 2nd dose", xLabel = "RBD ASC per 10^6 PBMC", yLabel = "S2 ASC per 10^6 PBMC", statsOff = F) +
  scale_y_continuous(trans='pseudo_log', limits = c(0,1e3), breaks=c(10^(0:4)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10)) + 
  scale_x_continuous(trans='pseudo_log', limits = c(0,1e3), breaks=c(10^(0:4)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10)) 
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 13 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 7 rows containing non-finite values (stat_smooth).
## Warning: Removed 13 rows containing missing values (geom_point).
## Warning: Removed 7 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing missing values (geom_smooth).
## Warning: Removed 18 rows containing missing values (geom_smooth).
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf

# ggsave(filename = "./Images/Elispots_IgG_S2-vs-RBD_correl_biv.pdf", width = 8 )

’ —————– DPV analysis post 1st vaccination ————————

subsetData <- subset(mergedData,  timeCategory != "two Weeks"& timeCategory != "2 wks post 2nd dose")
subsetData <- subset(subsetData, timeCategory == "Post 1st dose")
subsetData <- subsetData[which(subsetData$Tube == "HEP"),] 
# subsetData <- subset(subsetData, Record.ID != "CV-011" & Record.ID != "CV-012" & Record.ID != "CV-005")        # absence of Ki67 stain
subsetData$timeCategory <- factor(subsetData$timeCategory, levels = c("Baseline", "Post 1st dose", "Pre 2nd dose", "Post 2nd dose", "One month post\n2nd dose"))
prePostTime(subsetData, xData = "DPV", yData="Elispot_IgG_S1", fillParam = "Prior.COVID.infection.", groupby="Record.ID", title = "anti-S1 IgG ASC",
            xLabel = "Days", yLabel = "Spots per 10^6 PBMC", repMeasures = F, exponential=F, pathOff = T) + 
  scale_y_continuous(trans='pseudo_log', limits = c(0,1e3), breaks=c(10^(0:4)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10)) + 
  coord_cartesian(xlim = c(-1,15)) + geom_vline(xintercept = 0,linetype="dashed", alpha=0.5) 
## [1] "block2"
## Warning: Removed 15 rows containing missing values (geom_point).

# ggsave(filename = "./Images/Elispots_IgG-S1_vs_DPV.pdf")

# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "Elispot_IgG_S1")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/figs4D-1.csv")


prePostTime(subsetData, xData = "DPV", yData="Elispot_IgG_S2", fillParam = "Prior.COVID.infection.", groupby="Record.ID", title = "anti-S2 IgG ASC",
            xLabel = "Days", yLabel = "Spots per 10^6 PBMC", repMeasures = F, exponential=F, pathOff = T) + 
  scale_y_continuous(trans='pseudo_log', limits = c(0,1e3), breaks=c(10^(0:4)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10)) + 
  coord_cartesian(xlim = c(-1,15)) + geom_vline(xintercept = 0,linetype="dashed", alpha=0.5)
## [1] "block2"
## Warning: Removed 15 rows containing missing values (geom_point).

# ggsave(filename = "./Images/Elispots_IgG-S2_vs_DPV.pdf")

# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "Elispot_IgG_S2")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/figs4D-2.csv")

prePostTime(subsetData, xData = "DPV", yData="Elispot_IgG_RBD", fillParam = "Prior.COVID.infection.", groupby="Record.ID", title = "anti-RBD IgG ASC",
            xLabel = "Days", yLabel = "Spots per 10^6 PBMC", repMeasures = F, exponential=F, pathOff = T) + 
  scale_y_continuous(trans='pseudo_log', limits = c(0,1e3), breaks=c(10^(0:4)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10)) + 
  coord_cartesian(xlim = c(-1,15)) + geom_vline(xintercept = 0,linetype="dashed", alpha=0.5)
## [1] "block2"
## Warning: Removed 14 rows containing missing values (geom_point).

# ggsave(filename = "./Images/Elispots_IgG-RBD_vs_DPV.pdf")

# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "Elispot_IgG_RBD")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/figs4D-3.csv")

’ —————– post 2nd vaccination ————————

subsetData <- subset(mergedData,  timeCategory != "two Weeks"& timeCategory != "2 wks post 2nd dose")
subsetData$DPV <- subsetData$DPV - as.numeric(difftime(subsetData$Vaccine.2.date, subsetData$Vaccine.1.date, units="days" ) )
subsetData <- subset(subsetData, timeCategory == "Post 2nd dose")
prePostTime(subsetData, xData = "DPV", yData="Elispot_IgG_S1", fillParam = "Prior.COVID.infection.", groupby="Record.ID", title = "anti-S1 IgG ASC",
            xLabel = "Days", yLabel = "Spots per 10^6 PBMC", repMeasures = F, exponential=F, pathOff = T) + 
  scale_y_continuous(trans='pseudo_log', limits = c(0,1e3), breaks=c(10^(0:4)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10)) + 
  coord_cartesian(xlim = c(-5,15)) + geom_vline(xintercept = 0,linetype="dashed", alpha=0.5) 
## [1] "block2"
## Warning: Removed 20 rows containing missing values (geom_point).

# ggsave(filename = "./Images/Elispots_IgG-S1_vs_DPV_vax2.pdf")

# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "Elispot_IgG_S1")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/figs4D-4.csv")


prePostTime(subsetData, xData = "DPV", yData="Elispot_IgG_S2", fillParam = "Prior.COVID.infection.", groupby="Record.ID", title = "anti-S2 IgG ASC",
            xLabel = "Days", yLabel = "Spots per 10^6 PBMC", repMeasures = F, exponential=F, pathOff = T) + 
  scale_y_continuous(trans='pseudo_log', limits = c(0,1e3), breaks=c(10^(0:4)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10)) + 
  coord_cartesian(xlim = c(-5,15)) + geom_vline(xintercept = 0,linetype="dashed", alpha=0.5)
## [1] "block2"
## Warning: Removed 20 rows containing missing values (geom_point).

# ggsave(filename = "./Images/Elispots_IgG-S2_vs_DPV_vax2.pdf")

# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "Elispot_IgG_S2")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/figs4D-5.csv")


prePostTime(subsetData, xData = "DPV", yData="Elispot_IgG_RBD", fillParam = "Prior.COVID.infection.", groupby="Record.ID", title = "anti-RBD IgG ASC",
            xLabel = "Days", yLabel = "Spots per 10^6 PBMC", repMeasures = F, exponential=F, pathOff = T) + 
  scale_y_continuous(trans='pseudo_log', limits = c(0,1e3), breaks=c(10^(0:4)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10)) + 
  coord_cartesian(xlim = c(-5,15)) + geom_vline(xintercept = 0,linetype="dashed", alpha=0.5)
## [1] "block2"
## Warning: Removed 20 rows containing missing values (geom_point).

# ggsave(filename = "./Images/Elispots_IgG-RBD_vs_DPV_vax2.pdf")

# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "Elispot_IgG_RBD")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/figs4D-6.csv")



subsetData <- subset(mergedData, Prior.COVID.infection. == 'No' & timeCategory == "Post 1st dose")
subsetData <- subsetData[,grep("^Elispot", names(subsetData))]; subsetData <- subsetData[,-grep("Elispot_IgA_S1", names(subsetData))]
temp <- names(subsetData) ; temp <-  do.call(rbind.data.frame, strsplit(temp, split = "_"))
temp <- paste0(temp[,2]," anti-",temp[,3]); names(subsetData) <- temp
cor.elispots <- cor(subsetData, method="kendall" , use="pairwise.complete.obs" )
cor.elispots.pmat <- ggcorrplot::cor_pmat(subsetData, method="kendall", use="pairwise.complete.obs"  )
## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties
## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties
ggcorrplot::ggcorrplot(corr = cor.elispots, p.mat = cor.elispots.pmat, title = "ELISpots post 1st dose", legend.title = "Kendall tau", tl.cex = 18,pch.cex = 1, insig = "blank")

# ggsave(filename = "./Images/Elispots_ggcorrplot_Naive_post1st.pdf")
ggcorrplot::ggcorrplot(corr = cor.elispots, p.mat = cor.elispots.pmat, title = "ELISpots post 1st dose", legend.title = "Kendall tau", tl.cex = 18,pch.cex = 1) #, insig = "blank"

# ggsave(filename = "./Images/Elispots_ggcorrplot_Naive_post1st_full.pdf")

subsetData <- subset(mergedData, Prior.COVID.infection. == 'Yes' & timeCategory == "Post 1st dose")
subsetData <- subsetData[,grep("^Elispot", names(subsetData))]; 
temp <- names(subsetData) ; temp <-  do.call(rbind.data.frame, strsplit(temp, split = "_"))
temp <- paste0(temp[,2]," anti-",temp[,3]); names(subsetData) <- temp
cor.elispots <- cor(subsetData, method="kendall" , use="pairwise.complete.obs" )
cor.elispots.pmat <- ggcorrplot::cor_pmat(subsetData, method="kendall", use= "pairwise.complete.obs"  )
## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties
ggcorrplot::ggcorrplot(corr = cor.elispots, p.mat = cor.elispots.pmat, title = "ELISpots post 1st dose", legend.title = "Kendall tau", tl.cex = 18,pch.cex = 1, insig = "blank") #

# ggsave(filename = "./Images/Elispots_ggcorrplot_Experienced_post1st.pdf")
ggcorrplot::ggcorrplot(corr = cor.elispots, p.mat = cor.elispots.pmat, title = "ELISpots post 1st dose", legend.title = "Kendall tau", tl.cex = 18,pch.cex = 1) #, insig = "blank"

# ggsave(filename = "./Images/Elispots_ggcorrplot_Experienced_post1st_full.pdf")




subsetData <- subset(mergedData, Prior.COVID.infection. == 'No' & timeCategory == "Post 2nd dose")
subsetData <- subsetData[,grep("^Elispot", names(subsetData))]
temp <- names(subsetData) ; temp <-  do.call(rbind.data.frame, strsplit(temp, split = "_"))
temp <- paste0(temp[,2]," anti-",temp[,3]); names(subsetData) <- temp
cor.elispots <- cor(subsetData, method="kendall" , use="pairwise.complete.obs" )
cor.elispots.pmat <- ggcorrplot::cor_pmat(subsetData, method="kendall", use="pairwise.complete.obs"  )
## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties
ggcorrplot::ggcorrplot(corr = cor.elispots, p.mat = cor.elispots.pmat, title = "ELISpots post 2nd dose", legend.title = "Kendall tau", tl.cex = 18,pch.cex = 1, insig = "blank")

# ggsave(filename = "./Images/Elispots_ggcorrplot_Naive_post2nd.pdf")
ggcorrplot::ggcorrplot(corr = cor.elispots, p.mat = cor.elispots.pmat, title = "ELISpots post 2nd dose", legend.title = "Kendall tau", tl.cex = 18,pch.cex = 1) #, insig = "blank"

# ggsave(filename = "./Images/Elispots_ggcorrplot_Naive_post2nd_full.pdf")

subsetData <- subset(mergedData, Prior.COVID.infection. == 'Yes' & timeCategory == "Post 2nd dose")
subsetData <- subsetData[,grep("^Elispot", names(subsetData))]
temp <- names(subsetData) ; temp <-  do.call(rbind.data.frame, strsplit(temp, split = "_"))
temp <- paste0(temp[,2]," anti-",temp[,3]); names(subsetData) <- temp
cor.elispots <- cor(subsetData, method="kendall" , use="pairwise.complete.obs" )
cor.elispots.pmat <- ggcorrplot::cor_pmat(subsetData, method="kendall", use= "pairwise.complete.obs"  )
## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties
ggcorrplot::ggcorrplot(corr = cor.elispots, p.mat = cor.elispots.pmat, title = "ELISpots post 2nd dose", legend.title = "Kendall tau", tl.cex = 18,pch.cex = 1, insig = "blank") #

# ggsave(filename = "./Images/Elispots_ggcorrplot_Experienced_post2nd.pdf")
ggcorrplot::ggcorrplot(corr = cor.elispots, p.mat = cor.elispots.pmat, title = "ELISpots post 2nd dose", legend.title = "Kendall tau", tl.cex = 18,pch.cex = 1) #, insig = "blank"

# ggsave(filename = "./Images/Elispots_ggcorrplot_Experienced_post2nd_full.pdf")

—————— B tetramer analyses ————————–

subsetData <- subset(mergedData,  timeCategory != "two Weeks"& timeCategory != "2 wks post 2nd dose")

prePostTime(subsetData, xData = "timeCategory", yData="Btet_RBD_FreqParent", fillParam = "Prior.COVID.infection.", groupby="Record.ID", title = "RBD-reactive B cells", 
            xLabel = " ", yLabel = "RBD+ (% CD19)", repMeasures = F, exponential=F, newform = T) + scale_y_continuous(trans = "log10", limits = c(0.02, 5))
## [1] "block3"
## `summarise()` regrouping output by 'timeCategory' (override with `.groups` argument)
## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 79 row(s) containing missing values (geom_path).
## Warning: Removed 82 rows containing missing values (geom_point).

# ggsave(filename = "./Images/Btet_frequency_overTime.pdf")

bartlett.test(Btet_RBD_FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## 
##  Bartlett test of homogeneity of variances
## 
## data:  Btet_RBD_FreqParent by timeCategory
## Bartlett's K-squared = 13.528, df = 4, p-value = 0.008964
kruskal_test(formula = Btet_RBD_FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No')) 
## # A tibble: 1 x 6
##   .y.                     n statistic    df      p method        
## * <chr>               <int>     <dbl> <int>  <dbl> <chr>         
## 1 Btet_RBD_FreqParent   102      12.1     4 0.0167 Kruskal-Wallis
dunn_test(Btet_RBD_FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## # A tibble: 10 x 9
##    .y.      group1   group2       n1    n2 statistic       p  p.adj p.adj.signif
##  * <chr>    <chr>    <chr>     <int> <int>     <dbl>   <dbl>  <dbl> <chr>       
##  1 Btet_RB~ Baseline "Post 1s~    11    11     0.280 0.780   1      ns          
##  2 Btet_RB~ Baseline "Pre 2nd~    11    11     0.599 0.549   1      ns          
##  3 Btet_RB~ Baseline "Post 2n~    11    11     2.81  0.00496 0.0496 *           
##  4 Btet_RB~ Baseline "One mon~    11    11     2.10  0.0354  0.248  ns          
##  5 Btet_RB~ Post 1s~ "Pre 2nd~    11    11     0.320 0.749   1      ns          
##  6 Btet_RB~ Post 1s~ "Post 2n~    11    11     2.53  0.0114  0.103  ns          
##  7 Btet_RB~ Post 1s~ "One mon~    11    11     1.82  0.0681  0.409  ns          
##  8 Btet_RB~ Pre 2nd~ "Post 2n~    11    11     2.21  0.0271  0.217  ns          
##  9 Btet_RB~ Pre 2nd~ "One mon~    11    11     1.50  0.132   0.662  ns          
## 10 Btet_RB~ Post 2n~ "One mon~    11    11    -0.706 0.480   1      ns
bartlett.test(Btet_RBD_FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes'))
## 
##  Bartlett test of homogeneity of variances
## 
## data:  Btet_RBD_FreqParent by timeCategory
## Bartlett's K-squared = 38.181, df = 4, p-value = 1.028e-07
kruskal_test(formula = Btet_RBD_FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes')) 
## # A tibble: 1 x 6
##   .y.                     n statistic    df      p method        
## * <chr>               <int>     <dbl> <int>  <dbl> <chr>         
## 1 Btet_RBD_FreqParent    74      12.7     4 0.0126 Kruskal-Wallis
dunn_test(Btet_RBD_FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes'))
## # A tibble: 10 x 9
##    .y.      group1   group2       n1    n2 statistic       p  p.adj p.adj.signif
##  * <chr>    <chr>    <chr>     <int> <int>     <dbl>   <dbl>  <dbl> <chr>       
##  1 Btet_RB~ Baseline "Post 1s~     9     9     1.24  0.215   1      ns          
##  2 Btet_RB~ Baseline "Pre 2nd~     9     8     2.56  0.0105  0.0944 ns          
##  3 Btet_RB~ Baseline "Post 2n~     9     9     3.19  0.00144 0.0144 *           
##  4 Btet_RB~ Baseline "One mon~     9     9     2.34  0.0192  0.154  ns          
##  5 Btet_RB~ Post 1s~ "Pre 2nd~     9     8     1.36  0.175   1      ns          
##  6 Btet_RB~ Post 1s~ "Post 2n~     9     9     1.95  0.0516  0.361  ns          
##  7 Btet_RB~ Post 1s~ "One mon~     9     9     1.10  0.271   1      ns          
##  8 Btet_RB~ Pre 2nd~ "Post 2n~     8     9     0.531 0.595   1      ns          
##  9 Btet_RB~ Pre 2nd~ "One mon~     8     9    -0.288 0.773   1      ns          
## 10 Btet_RB~ Post 2n~ "One mon~     9     9    -0.845 0.398   1      ns
# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "Btet_RBD_FreqParent")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/fig5B.csv")


subsetData <- subset(mergedData,  timeCategory == "Baseline")
twoSampleBar(data = subsetData, xData = "Prior.COVID.infection.", yData = "Btet_RBD_FreqParent", fillParam = "Prior.COVID.infection.", title = "Baseline", 
             yLabel = "RBD+ (% CD19)", nonparam = T)
## Warning: Removed 16 rows containing missing values (position_quasirandom).

# ggsave(filename = "./Images/Btet_frequency_baseline.pdf", width=5)

# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "Btet_RBD_FreqParent")]
# write.csv(x = out, file = "plottedData/figs5B.csv")


twoSampleBar(data = subsetData, xData = "Prior.COVID.infection.", yData = "Btet_RBDhiIgDloIgGhi", fillParam = "Prior.COVID.infection.", title = "Baseline", 
             yLabel = "IgG+ IgD- (% RBD)", nonparam = T)
## Warning: Removed 16 rows containing missing values (position_quasirandom).

# ggsave(filename = "./Images/Btet_IgGhi_baseline.pdf", width=5)

# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "Btet_RBDhiIgDloIgGhi")]
# write.csv(x = out, file = "plottedData/figs5C.csv")



subsetData <- subset(mergedData, timeCategory == "Baseline" | timeCategory == "Post 2nd dose")
FC_response2 <- dcast( subsetData, `Record.ID` + `Prior.COVID.infection.` ~`timeCategory`, value.var = c("Btet_RBD_FreqParent")) 
FC_response2$FoldChange <- FC_response2$`Post 2nd dose`/FC_response2$`Baseline`; FC_response2$Cohort <- NULL
FC_response2 <- FC_response2[!is.infinite(FC_response2$FoldChange), ]
FC_response2 %>% group_by(Prior.COVID.infection.) %>% get_summary_stats(type = "common") 
## # A tibble: 6 x 11
##   Prior.COVID.inf~ variable     n   min   max median   iqr  mean    sd    se
##   <chr>            <chr>    <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 No               Baseline     9 0.014  0.19  0.067 0.054 0.072 0.052 0.017
## 2 No               FoldCha~     9 0.447 13.6   2.88  2.51  4.02  4.27  1.42 
## 3 No               Post 2n~     9 0.062  0.35  0.12  0.149 0.167 0.101 0.034
## 4 Yes              Baseline     9 0.039  0.24  0.12  0.064 0.119 0.065 0.022
## 5 Yes              FoldCha~     9 0.786 10.2   4.83  4.5   4.65  3.05  1.02 
## 6 Yes              Post 2n~     9 0.11   2.44  0.26  0.21  0.593 0.742 0.247
## # ... with 1 more variable: ci <dbl>
FC_response2 %>% wilcox_test(FoldChange ~ Prior.COVID.infection.)
## # A tibble: 1 x 7
##   .y.        group1 group2    n1    n2 statistic     p
## * <chr>      <chr>  <chr>  <int> <int>     <dbl> <dbl>
## 1 FoldChange No     Yes       19    15        31 0.436
subsetData <- subset(mergedData,  timeCategory != "two Weeks"& timeCategory != "2 wks post 2nd dose");
prePostTime(subsetData, xData = "timeCategory", yData="Btet_RBD_FreqParent", fillParam = "Prior.COVID.infection.", groupby="Record.ID", title = "RBD-reactive B cells", 
            xLabel = " ", yLabel = "RBD+ (% CD19)", repMeasures = F, exponential=F, newform = T, recentCOVID = T) + scale_y_continuous(trans = "log10", limits = c(0.02, 5))
## [1] "block3.1 recentCOVID"
## `summarise()` regrouping output by 'timeCategory' (override with `.groups` argument)
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 79 row(s) containing missing values (geom_path).
## Warning: Removed 82 rows containing missing values (geom_point).

# ggsave(filename = "./Images/Btet_frequency_overTime_recentCOVID.pdf")



prePostTime(subsetData, xData = "timeCategory", yData="Btet_RBDhiIgDloIgGhi", fillParam = "Prior.COVID.infection.", groupby="Record.ID", title = "Switched RBD+ B cells", 
            xLabel = " ", yLabel = "IgG+ IgD- (% RBD+)", repMeasures = F, exponential=F, newform = T) + scale_y_continuous(breaks=seq(0,100,25), limits = c(0,125))
## [1] "block3"
## `summarise()` regrouping output by 'timeCategory' (override with `.groups` argument)
## Warning: Removed 77 row(s) containing missing values (geom_path).
## Warning: Removed 77 rows containing missing values (geom_point).

# ggsave(filename = "./Images/Btet_IgGhi_overTime.pdf")

bartlett.test(Btet_RBDhiIgDloIgGhi ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## 
##  Bartlett test of homogeneity of variances
## 
## data:  Btet_RBDhiIgDloIgGhi by timeCategory
## Bartlett's K-squared = 57.985, df = 4, p-value = 7.688e-12
kruskal_test(formula = Btet_RBDhiIgDloIgGhi ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No')) 
## # A tibble: 1 x 6
##   .y.                      n statistic    df        p method        
## * <chr>                <int>     <dbl> <int>    <dbl> <chr>         
## 1 Btet_RBDhiIgDloIgGhi   102      23.5     4 0.000101 Kruskal-Wallis
dunn_test(Btet_RBDhiIgDloIgGhi ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## # A tibble: 10 x 9
##    .y.      group1  group2       n1    n2 statistic       p   p.adj p.adj.signif
##  * <chr>    <chr>   <chr>     <int> <int>     <dbl>   <dbl>   <dbl> <chr>       
##  1 Btet_RB~ Baseli~ "Post 1s~    11    11    -1.06  2.88e-1 8.64e-1 ns          
##  2 Btet_RB~ Baseli~ "Pre 2nd~    11    11     0.643 5.20e-1 8.64e-1 ns          
##  3 Btet_RB~ Baseli~ "Post 2n~    11    11     3.17  1.50e-3 1.20e-2 *           
##  4 Btet_RB~ Baseli~ "One mon~    11    11     2.28  2.26e-2 1.36e-1 ns          
##  5 Btet_RB~ Post 1~ "Pre 2nd~    11    11     1.71  8.80e-2 4.40e-1 ns          
##  6 Btet_RB~ Post 1~ "Post 2n~    11    11     4.24  2.26e-5 2.26e-4 ***         
##  7 Btet_RB~ Post 1~ "One mon~    11    11     3.34  8.31e-4 7.48e-3 **          
##  8 Btet_RB~ Pre 2n~ "Post 2n~    11    11     2.53  1.14e-2 7.96e-2 ns          
##  9 Btet_RB~ Pre 2n~ "One mon~    11    11     1.64  1.02e-1 4.40e-1 ns          
## 10 Btet_RB~ Post 2~ "One mon~    11    11    -0.895 3.71e-1 8.64e-1 ns
bartlett.test(Btet_RBDhiIgDloIgGhi ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes'))
## 
##  Bartlett test of homogeneity of variances
## 
## data:  Btet_RBDhiIgDloIgGhi by timeCategory
## Bartlett's K-squared = 2.741, df = 4, p-value = 0.6021
kruskal_test(formula = Btet_RBDhiIgDloIgGhi ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes'))
## # A tibble: 1 x 6
##   .y.                      n statistic    df     p method        
## * <chr>                <int>     <dbl> <int> <dbl> <chr>         
## 1 Btet_RBDhiIgDloIgGhi    74      6.83     4 0.145 Kruskal-Wallis
dunn_test(Btet_RBDhiIgDloIgGhi ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes'))
## # A tibble: 10 x 9
##    .y.       group1   group2        n1    n2 statistic      p p.adj p.adj.signif
##  * <chr>     <chr>    <chr>      <int> <int>     <dbl>  <dbl> <dbl> <chr>       
##  1 Btet_RBD~ Baseline "Post 1st~     9     9     0.422 0.673  1     ns          
##  2 Btet_RBD~ Baseline "Pre 2nd ~     9     8     1.19  0.236  1     ns          
##  3 Btet_RBD~ Baseline "Post 2nd~     9     9     1.96  0.0504 0.454 ns          
##  4 Btet_RBD~ Baseline "One mont~     9     9     2.10  0.0354 0.354 ns          
##  5 Btet_RBD~ Post 1s~ "Pre 2nd ~     9     8     0.775 0.438  1     ns          
##  6 Btet_RBD~ Post 1s~ "Post 2nd~     9     9     1.53  0.125  0.875 ns          
##  7 Btet_RBD~ Post 1s~ "One mont~     9     9     1.68  0.0928 0.742 ns          
##  8 Btet_RBD~ Pre 2nd~ "Post 2nd~     8     9     0.713 0.476  1     ns          
##  9 Btet_RBD~ Pre 2nd~ "One mont~     8     9     0.855 0.392  1     ns          
## 10 Btet_RBD~ Post 2n~ "One mont~     9     9     0.147 0.883  1     ns
# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "Btet_RBDhiIgDloIgGhi")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/fig5D.csv")



prePostTime(subsetData, xData = "timeCategory", yData="Btet_RBDhiIgDloIgGhi", fillParam = "Prior.COVID.infection.", groupby="Record.ID", title = "Switched RBD+ B cells", 
            xLabel = " ", yLabel = "IgG+ IgD- (% RBD+)", repMeasures = F, exponential=F, newform = T, recentCOVID = T) + scale_y_continuous(breaks=seq(0,100,25), limits = c(0,125))
## [1] "block3.1 recentCOVID"
## `summarise()` regrouping output by 'timeCategory' (override with `.groups` argument)
## Warning: Removed 77 row(s) containing missing values (geom_path).

## Warning: Removed 77 rows containing missing values (geom_point).

# ggsave(filename = "./Images/Btet_IgGhi_overTime_recentCOVID.pdf")


prePostTime(subsetData, xData = "timeCategory", yData="Btet_RBDhi_DN2_FreqCD19", fillParam = "Prior.COVID.infection.", groupby="Record.ID", title = "DN2 RBD+ B cells", 
            xLabel = " ", yLabel = "DN2 B cells (% CD19+)", repMeasures = F, exponential=F, newform = T) + scale_y_continuous(limits = c(0.001,3),  trans = "log10")
## [1] "block3"
## `summarise()` regrouping output by 'timeCategory' (override with `.groups` argument)
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 77 row(s) containing missing values (geom_path).
## Warning: Removed 77 rows containing missing values (geom_point).

# ggsave(filename = "./Images/Btet_DN2_overTime_fullPlot.pdf")

bartlett.test(Btet_RBDhi_DN2_FreqCD19 ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## 
##  Bartlett test of homogeneity of variances
## 
## data:  Btet_RBDhi_DN2_FreqCD19 by timeCategory
## Bartlett's K-squared = Inf, df = 4, p-value < 2.2e-16
kruskal_test(formula = Btet_RBDhi_DN2_FreqCD19 ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No')) 
## # A tibble: 1 x 6
##   .y.                         n statistic    df        p method        
## * <chr>                   <int>     <dbl> <int>    <dbl> <chr>         
## 1 Btet_RBDhi_DN2_FreqCD19   102      21.7     4 0.000233 Kruskal-Wallis
dunn_test(Btet_RBDhi_DN2_FreqCD19 ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## # A tibble: 10 x 9
##    .y.      group1  group2       n1    n2 statistic       p   p.adj p.adj.signif
##  * <chr>    <chr>   <chr>     <int> <int>     <dbl>   <dbl>   <dbl> <chr>       
##  1 Btet_RB~ Baseli~ "Post 1s~    11    11    -0.431 6.67e-1 1       ns          
##  2 Btet_RB~ Baseli~ "Pre 2nd~    11    11     0.268 7.89e-1 1       ns          
##  3 Btet_RB~ Baseli~ "Post 2n~    11    11     3.43  6.04e-4 0.00544 **          
##  4 Btet_RB~ Baseli~ "One mon~    11    11     2.18  2.94e-2 0.176   ns          
##  5 Btet_RB~ Post 1~ "Pre 2nd~    11    11     0.699 4.85e-1 1       ns          
##  6 Btet_RB~ Post 1~ "Post 2n~    11    11     3.86  1.13e-4 0.00113 **          
##  7 Btet_RB~ Post 1~ "One mon~    11    11     2.61  9.08e-3 0.0636  ns          
##  8 Btet_RB~ Pre 2n~ "Post 2n~    11    11     3.16  1.57e-3 0.0126  *           
##  9 Btet_RB~ Pre 2n~ "One mon~    11    11     1.91  5.61e-2 0.281   ns          
## 10 Btet_RB~ Post 2~ "One mon~    11    11    -1.25  2.11e-1 0.843   ns
bartlett.test(Btet_RBDhi_DN2_FreqCD19 ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes'))
## 
##  Bartlett test of homogeneity of variances
## 
## data:  Btet_RBDhi_DN2_FreqCD19 by timeCategory
## Bartlett's K-squared = 83.687, df = 4, p-value < 2.2e-16
kruskal_test(formula = Btet_RBDhi_DN2_FreqCD19 ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes')) 
## # A tibble: 1 x 6
##   .y.                         n statistic    df       p method        
## * <chr>                   <int>     <dbl> <int>   <dbl> <chr>         
## 1 Btet_RBDhi_DN2_FreqCD19    74      13.4     4 0.00928 Kruskal-Wallis
dunn_test(Btet_RBDhi_DN2_FreqCD19 ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes'))
## # A tibble: 10 x 9
##    .y.       group1  group2       n1    n2 statistic       p  p.adj p.adj.signif
##  * <chr>     <chr>   <chr>     <int> <int>     <dbl>   <dbl>  <dbl> <chr>       
##  1 Btet_RBD~ Baseli~ "Post 1s~     9     9    0.789  0.430   1      ns          
##  2 Btet_RBD~ Baseli~ "Pre 2nd~     9     8    2.23   0.0260  0.208  ns          
##  3 Btet_RBD~ Baseli~ "Post 2n~     9     9    2.81   0.00489 0.0441 *           
##  4 Btet_RBD~ Baseli~ "One mon~     9     9    2.91   0.00363 0.0363 *           
##  5 Btet_RBD~ Post 1~ "Pre 2nd~     9     8    1.46   0.144   0.721  ns          
##  6 Btet_RBD~ Post 1~ "Post 2n~     9     9    2.02   0.0429  0.257  ns          
##  7 Btet_RBD~ Post 1~ "One mon~     9     9    2.12   0.0340  0.238  ns          
##  8 Btet_RBD~ Pre 2n~ "Post 2n~     8     9    0.504  0.614   1      ns          
##  9 Btet_RBD~ Pre 2n~ "One mon~     8     9    0.596  0.551   1      ns          
## 10 Btet_RBD~ Post 2~ "One mon~     9     9    0.0951 0.924   1      ns
# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "Btet_RBDhi_DN2_FreqCD19")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/figs5I.csv")



prePostTime(subsetData, xData = "timeCategory", yData="Btet_RBD_CD71._FreqParent", fillParam = "Prior.COVID.infection.", groupby="Record.ID", title = "CD71+ RBD+ B cells", 
            xLabel = " ", yLabel = "CD71+ IgDlo (% RBD+)", repMeasures = F, exponential=F, newform = T) + scale_y_continuous(breaks=seq(0,100,20),  trans = "identity", limits = c(0, 100))
## [1] "block3"
## `summarise()` regrouping output by 'timeCategory' (override with `.groups` argument)
## Warning: Removed 77 row(s) containing missing values (geom_path).

## Warning: Removed 77 rows containing missing values (geom_point).

# ggsave(filename = "./Images/Btet_CD71_overTime.pdf")
bartlett.test(Btet_RBD_CD71._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## 
##  Bartlett test of homogeneity of variances
## 
## data:  Btet_RBD_CD71._FreqParent by timeCategory
## Bartlett's K-squared = 32.531, df = 4, p-value = 1.49e-06
kruskal_test(formula = Btet_RBD_CD71._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No')) 
## # A tibble: 1 x 6
##   .y.                           n statistic    df      p method        
## * <chr>                     <int>     <dbl> <int>  <dbl> <chr>         
## 1 Btet_RBD_CD71._FreqParent   102      12.1     4 0.0166 Kruskal-Wallis
dunn_test(Btet_RBD_CD71._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## # A tibble: 10 x 9
##    .y.      group1   group2      n1    n2 statistic       p   p.adj p.adj.signif
##  * <chr>    <chr>    <chr>    <int> <int>     <dbl>   <dbl>   <dbl> <chr>       
##  1 Btet_RB~ Baseline "Post 1~    11    11    0.927  3.54e-1 1       ns          
##  2 Btet_RB~ Baseline "Pre 2n~    11    11    0.996  3.19e-1 1       ns          
##  3 Btet_RB~ Baseline "Post 2~    11    11    3.32   8.98e-4 0.00898 **          
##  4 Btet_RB~ Baseline "One mo~    11    11    1.55   1.21e-1 0.724   ns          
##  5 Btet_RB~ Post 1s~ "Pre 2n~    11    11    0.0695 9.45e-1 1       ns          
##  6 Btet_RB~ Post 1s~ "Post 2~    11    11    2.39   1.67e-2 0.150   ns          
##  7 Btet_RB~ Post 1s~ "One mo~    11    11    0.626  5.32e-1 1       ns          
##  8 Btet_RB~ Pre 2nd~ "Post 2~    11    11    2.32   2.01e-2 0.161   ns          
##  9 Btet_RB~ Pre 2nd~ "One mo~    11    11    0.556  5.78e-1 1       ns          
## 10 Btet_RB~ Post 2n~ "One mo~    11    11   -1.77   7.70e-2 0.539   ns
bartlett.test(Btet_RBD_CD71._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes'))
## 
##  Bartlett test of homogeneity of variances
## 
## data:  Btet_RBD_CD71._FreqParent by timeCategory
## Bartlett's K-squared = 42.502, df = 4, p-value = 1.313e-08
kruskal_test(formula = Btet_RBD_CD71._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes')) 
## # A tibble: 1 x 6
##   .y.                           n statistic    df       p method        
## * <chr>                     <int>     <dbl> <int>   <dbl> <chr>         
## 1 Btet_RBD_CD71._FreqParent    74      14.0     4 0.00724 Kruskal-Wallis
dunn_test(Btet_RBD_CD71._FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes'))
## # A tibble: 10 x 9
##    .y.       group1  group2       n1    n2 statistic       p  p.adj p.adj.signif
##  * <chr>     <chr>   <chr>     <int> <int>     <dbl>   <dbl>  <dbl> <chr>       
##  1 Btet_RBD~ Baseli~ "Post 1s~     9     9     3.07  0.00215 0.0215 *           
##  2 Btet_RBD~ Baseli~ "Pre 2nd~     9     8     2.55  0.0107  0.0963 ns          
##  3 Btet_RBD~ Baseli~ "Post 2n~     9     9     2.27  0.0234  0.164  ns          
##  4 Btet_RBD~ Baseli~ "One mon~     9     9     0.625 0.532   1      ns          
##  5 Btet_RBD~ Post 1~ "Pre 2nd~     9     8    -0.424 0.672   1      ns          
##  6 Btet_RBD~ Post 1~ "Post 2n~     9     9    -0.802 0.423   1      ns          
##  7 Btet_RBD~ Post 1~ "One mon~     9     9    -2.44  0.0146  0.116  ns          
##  8 Btet_RBD~ Pre 2n~ "Post 2n~     8     9    -0.354 0.723   1      ns          
##  9 Btet_RBD~ Pre 2n~ "One mon~     8     9    -1.95  0.0516  0.310  ns          
## 10 Btet_RBD~ Post 2~ "One mon~     9     9    -1.64  0.101   0.504  ns
# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "Btet_RBD_CD71._FreqParent")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/fig5F.csv")


prePostTime(subsetData, xData = "timeCategory", yData="Btet_RBD_CD21lo_FreqParent", fillParam = "Prior.COVID.infection.", groupby="Record.ID", title = "CD21lo RBD+ B cells", 
            xLabel = " ", yLabel = "CD21lo (% RBD+)", repMeasures = F, exponential=F, newform = T) + scale_y_continuous(trans="pseudo_log", limits = c(0,100), breaks = c(0,1,10,25,50,100))
## [1] "block3"
## `summarise()` regrouping output by 'timeCategory' (override with `.groups` argument)
## Warning: Removed 77 row(s) containing missing values (geom_path).

## Warning: Removed 77 rows containing missing values (geom_point).

# scale_y_continuous(breaks=seq(0,100,10),  trans = "identity", limits = c(0,60))
# ggsave(filename = "./Images/Btet_CD21lo_overTime.pdf")
bartlett.test(Btet_RBD_CD21lo_FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## 
##  Bartlett test of homogeneity of variances
## 
## data:  Btet_RBD_CD21lo_FreqParent by timeCategory
## Bartlett's K-squared = 28.345, df = 4, p-value = 1.062e-05
kruskal_test(formula = Btet_RBD_CD21lo_FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No')) 
## # A tibble: 1 x 6
##   .y.                            n statistic    df      p method        
## * <chr>                      <int>     <dbl> <int>  <dbl> <chr>         
## 1 Btet_RBD_CD21lo_FreqParent   102      8.51     4 0.0745 Kruskal-Wallis
dunn_test(Btet_RBD_CD21lo_FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## # A tibble: 10 x 9
##    .y.        group1  group2        n1    n2 statistic      p p.adj p.adj.signif
##  * <chr>      <chr>   <chr>      <int> <int>     <dbl>  <dbl> <dbl> <chr>       
##  1 Btet_RBD_~ Baseli~ "Post 1st~    11    11   -0.0921 0.927  1     ns          
##  2 Btet_RBD_~ Baseli~ "Pre 2nd ~    11    11    0.516  0.606  1     ns          
##  3 Btet_RBD_~ Baseli~ "Post 2nd~    11    11    2.44   0.0147 0.132 ns          
##  4 Btet_RBD_~ Baseli~ "One mont~    11    11    0.405  0.685  1     ns          
##  5 Btet_RBD_~ Post 1~ "Pre 2nd ~    11    11    0.608  0.543  1     ns          
##  6 Btet_RBD_~ Post 1~ "Post 2nd~    11    11    2.53   0.0113 0.113 ns          
##  7 Btet_RBD_~ Post 1~ "One mont~    11    11    0.497  0.619  1     ns          
##  8 Btet_RBD_~ Pre 2n~ "Post 2nd~    11    11    1.92   0.0543 0.380 ns          
##  9 Btet_RBD_~ Pre 2n~ "One mont~    11    11   -0.110  0.912  1     ns          
## 10 Btet_RBD_~ Post 2~ "One mont~    11    11   -2.04   0.0418 0.335 ns
bartlett.test(Btet_RBD_CD21lo_FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes'))
## 
##  Bartlett test of homogeneity of variances
## 
## data:  Btet_RBD_CD21lo_FreqParent by timeCategory
## Bartlett's K-squared = 19.979, df = 4, p-value = 0.0005043
kruskal_test(formula = Btet_RBD_CD21lo_FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes')) 
## # A tibble: 1 x 6
##   .y.                            n statistic    df     p method        
## * <chr>                      <int>     <dbl> <int> <dbl> <chr>         
## 1 Btet_RBD_CD21lo_FreqParent    74      2.64     4 0.619 Kruskal-Wallis
dunn_test(Btet_RBD_CD21lo_FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes'))
## # A tibble: 10 x 9
##    .y.        group1   group2        n1    n2 statistic     p p.adj p.adj.signif
##  * <chr>      <chr>    <chr>      <int> <int>     <dbl> <dbl> <dbl> <chr>       
##  1 Btet_RBD_~ Baseline "Post 1st~     9     9   0.265   0.791     1 ns          
##  2 Btet_RBD_~ Baseline "Pre 2nd ~     9     8   0.803   0.422     1 ns          
##  3 Btet_RBD_~ Baseline "Post 2nd~     9     9   0.834   0.404     1 ns          
##  4 Btet_RBD_~ Baseline "One mont~     9     9  -0.539   0.590     1 ns          
##  5 Btet_RBD_~ Post 1s~ "Pre 2nd ~     9     8   0.546   0.585     1 ns          
##  6 Btet_RBD_~ Post 1s~ "Post 2nd~     9     9   0.569   0.569     1 ns          
##  7 Btet_RBD_~ Post 1s~ "One mont~     9     9  -0.804   0.421     1 ns          
##  8 Btet_RBD_~ Pre 2nd~ "Post 2nd~     8     9   0.00595 0.995     1 ns          
##  9 Btet_RBD_~ Pre 2nd~ "One mont~     8     9  -1.33    0.185     1 ns          
## 10 Btet_RBD_~ Post 2n~ "One mont~     9     9  -1.37    0.170     1 ns
# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "Btet_RBD_CD21lo_FreqParent")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/figs5G.csv")


prePostTime(subsetData, xData = "timeCategory", yData="Btet_RBD_CD24lo_FreqParent", fillParam = "Prior.COVID.infection.", groupby="Record.ID", title = "CD24lo RBD+ B cells", 
            xLabel = " ", yLabel = "CD24lo (% RBD+)", repMeasures = F, exponential=F, newform = T) + scale_y_continuous(breaks=seq(0,100,20),  trans = "identity", limits = c(0, 120))
## [1] "block3"
## `summarise()` regrouping output by 'timeCategory' (override with `.groups` argument)
## Warning: Removed 77 row(s) containing missing values (geom_path).

## Warning: Removed 77 rows containing missing values (geom_point).

# ggsave(filename = "./Images/Btet_CD24lo_overTime.pdf")
bartlett.test(Btet_RBD_CD24lo_FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## 
##  Bartlett test of homogeneity of variances
## 
## data:  Btet_RBD_CD24lo_FreqParent by timeCategory
## Bartlett's K-squared = 12.128, df = 4, p-value = 0.01643
kruskal_test(formula = Btet_RBD_CD24lo_FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No')) 
## # A tibble: 1 x 6
##   .y.                            n statistic    df        p method        
## * <chr>                      <int>     <dbl> <int>    <dbl> <chr>         
## 1 Btet_RBD_CD24lo_FreqParent   102      22.0     4 0.000201 Kruskal-Wallis
dunn_test(Btet_RBD_CD24lo_FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## # A tibble: 10 x 9
##    .y.      group1  group2       n1    n2 statistic       p   p.adj p.adj.signif
##  * <chr>    <chr>   <chr>     <int> <int>     <dbl>   <dbl>   <dbl> <chr>       
##  1 Btet_RB~ Baseli~ "Post 1s~    11    11    1.05   2.92e-1 1.00e+0 ns          
##  2 Btet_RB~ Baseli~ "Pre 2nd~    11    11    2.05   3.99e-2 2.10e-1 ns          
##  3 Btet_RB~ Baseli~ "Post 2n~    11    11    4.47   7.76e-6 7.76e-5 ****        
##  4 Btet_RB~ Baseli~ "One mon~    11    11    2.11   3.49e-2 2.10e-1 ns          
##  5 Btet_RB~ Post 1~ "Pre 2nd~    11    11    1.00   3.17e-1 1.00e+0 ns          
##  6 Btet_RB~ Post 1~ "Post 2n~    11    11    3.42   6.33e-4 5.69e-3 **          
##  7 Btet_RB~ Post 1~ "One mon~    11    11    1.05   2.92e-1 1.00e+0 ns          
##  8 Btet_RB~ Pre 2n~ "Post 2n~    11    11    2.42   1.56e-2 1.25e-1 ns          
##  9 Btet_RB~ Pre 2n~ "One mon~    11    11    0.0548 9.56e-1 1.00e+0 ns          
## 10 Btet_RB~ Post 2~ "One mon~    11    11   -2.36   1.81e-2 1.27e-1 ns
bartlett.test(Btet_RBD_CD24lo_FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes'))
## 
##  Bartlett test of homogeneity of variances
## 
## data:  Btet_RBD_CD24lo_FreqParent by timeCategory
## Bartlett's K-squared = 4.7743, df = 4, p-value = 0.3113
dunn_test(Btet_RBD_CD24lo_FreqParent ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes'))
## # A tibble: 10 x 9
##    .y.       group1  group2       n1    n2 statistic       p  p.adj p.adj.signif
##  * <chr>     <chr>   <chr>     <int> <int>     <dbl>   <dbl>  <dbl> <chr>       
##  1 Btet_RBD~ Baseli~ "Post 1s~     9     9     2.32  0.0201  0.181  ns          
##  2 Btet_RBD~ Baseli~ "Pre 2nd~     9     8     1.94  0.0520  0.364  ns          
##  3 Btet_RBD~ Baseli~ "Post 2n~     9     9     3.12  0.00179 0.0179 *           
##  4 Btet_RBD~ Baseli~ "One mon~     9     9     2.16  0.0308  0.247  ns          
##  5 Btet_RBD~ Post 1~ "Pre 2nd~     9     8    -0.312 0.755   1      ns          
##  6 Btet_RBD~ Post 1~ "Post 2n~     9     9     0.799 0.424   1      ns          
##  7 Btet_RBD~ Post 1~ "One mon~     9     9    -0.165 0.869   1      ns          
##  8 Btet_RBD~ Pre 2n~ "Post 2n~     8     9     1.09  0.277   1      ns          
##  9 Btet_RBD~ Pre 2n~ "One mon~     8     9     0.152 0.880   1      ns          
## 10 Btet_RBD~ Post 2~ "One mon~     9     9    -0.965 0.335   1      ns
# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "Btet_RBD_CD24lo_FreqParent")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/figs5F.csv")

—————— Antibody analyses ————————–

linePlot(data = mergedData, xData = 'DPV', yData = 'binding_IgG_S1', groupby = 'Record.ID', xLabel = 'Days relative to first dose', yLabel = "anti-S1 IgG titer", 
         title = "anti-S1 IgG titer", colorby = "Prior.COVID.infection.") + 
  # scale_color_manual(name="Prior COVID?",values = c("#FFC26A","#B5B2F1")) + 
  scale_y_continuous(trans='pseudo_log', limits = c(0,1e7), breaks=c(10^(0:7)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10))

# ggsave(filename = "./Images/BindingAb_S1_IgG_linePlot_contTime.pdf", width=7)


subsetData <- mergedData[-which(mergedData$timeCategory == "two Weeks"),] ; subsetData <- subsetData[-which(subsetData$timeCategory == "2 wks post 2nd dose"),]
subsetData$timeCategory <- factor(subsetData$timeCategory, levels = c("Baseline", "Post 1st dose", "Pre 2nd dose", "Post 2nd dose", "One month post\n2nd dose",
                                                                      "Four months post\n2nd dose"))
a <- linePlot(data = subsetData, xData = 'timeCategory', yData = 'binding_IgG_S1', groupby = 'Record.ID', xLabel = ' ', yLabel = "anti-S1 IgG titer", 
              title = "anti-S1 IgG titer", colorby = "Prior.COVID.infection.") + 
  geom_hline(yintercept = 25, linetype = "dashed",alpha=0.3) + annotate("text", x=5,y=15,label = "LOD", color="black", alpha=0.2)+
  scale_y_continuous(trans='pseudo_log', limits = c(0,1e8), breaks=c(10^(0:8)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10)); a

# ggsave(filename = "./Images/BindingAb_S1_IgG_linePlot.pdf")
# plotly::ggplotly(a)

bartlett.test(binding_IgG_S1 ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## 
##  Bartlett test of homogeneity of variances
## 
## data:  binding_IgG_S1 by timeCategory
## Bartlett's K-squared = 609.05, df = 4, p-value < 2.2e-16
kruskal_test(formula = binding_IgG_S1 ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No')) 
## # A tibble: 1 x 6
##   .y.                n statistic    df        p method        
## * <chr>          <int>     <dbl> <int>    <dbl> <chr>         
## 1 binding_IgG_S1   102      75.7     4 1.38e-15 Kruskal-Wallis
dunn_test(binding_IgG_S1 ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## # A tibble: 10 x 9
##    .y.     group1  group2      n1    n2 statistic        p    p.adj p.adj.signif
##  * <chr>   <chr>   <chr>    <int> <int>     <dbl>    <dbl>    <dbl> <chr>       
##  1 bindin~ Baseli~ "Post 1~    21    17     1.81  7.01e- 2 1.40e- 1 ns          
##  2 bindin~ Baseli~ "Pre 2n~    21    17     4.53  5.90e- 6 3.54e- 5 ****        
##  3 bindin~ Baseli~ "Post 2~    21    17     7.09  1.30e-12 1.30e-11 ****        
##  4 bindin~ Baseli~ "One mo~    21    17     6.75  1.48e-11 1.33e-10 ****        
##  5 bindin~ Post 1~ "Pre 2n~    17    17     2.59  9.71e- 3 4.86e- 2 *           
##  6 bindin~ Post 1~ "Post 2~    17    17     5.02  5.04e- 7 4.03e- 6 ****        
##  7 bindin~ Post 1~ "One mo~    17    17     4.70  2.63e- 6 1.84e- 5 ****        
##  8 bindin~ Pre 2n~ "Post 2~    17    17     2.44  1.47e- 2 5.89e- 2 ns          
##  9 bindin~ Pre 2n~ "One mo~    17    17     2.11  3.47e- 2 1.04e- 1 ns          
## 10 bindin~ Post 2~ "One mo~    17    17    -0.327 7.43e- 1 7.43e- 1 ns
kruskal_test(formula = binding_IgG_S1 ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes')) 
## # A tibble: 1 x 6
##   .y.                n statistic    df          p method        
## * <chr>          <int>     <dbl> <int>      <dbl> <chr>         
## 1 binding_IgG_S1    74      37.1     4 0.00000017 Kruskal-Wallis
bartlett.test(binding_IgG_S1 ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes'))
## 
##  Bartlett test of homogeneity of variances
## 
## data:  binding_IgG_S1 by timeCategory
## Bartlett's K-squared = 122.1, df = 4, p-value < 2.2e-16
dunn_test(binding_IgG_S1 ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes'))
## # A tibble: 10 x 9
##    .y.     group1   group2       n1    n2 statistic       p   p.adj p.adj.signif
##  * <chr>   <chr>    <chr>     <int> <int>     <dbl>   <dbl>   <dbl> <chr>       
##  1 bindin~ Baseline "Post 1s~    15    15     3.82  1.33e-4 9.33e-4 ***         
##  2 bindin~ Baseline "Pre 2nd~    15    14     3.92  8.79e-5 7.03e-4 ***         
##  3 bindin~ Baseline "Post 2n~    15    15     5.12  3.07e-7 2.76e-6 ****        
##  4 bindin~ Baseline "One mon~    15    15     5.37  8.08e-8 8.08e-7 ****        
##  5 bindin~ Post 1s~ "Pre 2nd~    15    14     0.168 8.67e-1 1.00e+0 ns          
##  6 bindin~ Post 1s~ "Post 2n~    15    15     1.30  1.94e-1 8.85e-1 ns          
##  7 bindin~ Post 1s~ "One mon~    15    15     1.55  1.22e-1 7.34e-1 ns          
##  8 bindin~ Pre 2nd~ "Post 2n~    14    15     1.11  2.68e-1 8.85e-1 ns          
##  9 bindin~ Pre 2nd~ "One mon~    14    15     1.35  1.77e-1 8.85e-1 ns          
## 10 bindin~ Post 2n~ "One mon~    15    15     0.246 8.06e-1 1.00e+0 ns
# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "binding_IgG_S1")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/fig6A.csv")


linePlot(data = subsetData, xData = 'timeCategory', yData = 'binding_IgG_S1', groupby = 'Record.ID', xLabel = ' ', yLabel = "anti-S1 IgG titer", 
         title = "anti-S1 IgG titer", colorby = "Prior.COVID.infection.", recentCOVID = T) + 
  geom_hline(yintercept = 25, linetype = "dashed",alpha=0.3) + annotate("text", x=5,y=15,label = "LOD", color="black", alpha=0.2)+
  scale_y_continuous(trans='pseudo_log', limits = c(0,1e7), breaks=c(10^(0:7)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10))

# ggsave(filename = "./Images/BindingAb_S1_IgG_linePlot_recentCOVID.pdf")

subsetData <- subset(mergedData, timeCategory == "Post 2nd dose")
twoSampleBar(data = subsetData , xData = "Prior.COVID.infection.", yData = "binding_IgG_S1", fillParam = "Prior.COVID.infection.", 
             title = "Post 2nd dose", yLabel = "anti-S1 IgG titer", nonparam = T) +   coord_cartesian(ylim=c(0e1,1e7),) + 
  scale_y_continuous(trans='pseudo_log', breaks=c(10^(0:7)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10)) 
## Warning: Removed 3 rows containing missing values (position_quasirandom).

# ggsave(filename = "./Images/BindingAb_S1_IgG_post2ndDose.pdf", width=5)

# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "binding_IgG_S1")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/figs6B.csv")

subsetData <- subset(mergedData, timeCategory == "One month post\n2nd dose")
twoSampleBar(data = subsetData, xData = "Prior.COVID.infection.", yData = "binding_IgG_S1", fillParam = "Prior.COVID.infection.", 
             title = "One month post", yLabel = "anti-S1 IgG titer", nonparam = T) +   coord_cartesian(ylim=c(0e1,1e7),) + 
  scale_y_continuous(trans='pseudo_log', breaks=c(10^(0:7)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10))
## Warning: Removed 2 rows containing missing values (position_quasirandom).

# ggsave(filename = "./Images/BindingAb_S1_IgG_oneMonth.pdf", width=5)

# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "binding_IgG_S1")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/figs6B-2.csv")

subsetData <- subset(mergedData, timeCategory %in% c("Post 2nd dose","One month post\n2nd dose") )
subsetData <- subsetData[, c(1:2, 11,27)]
temp <- subsetData %>% group_by(Prior.COVID.infection., timeCategory) %>% get_summary_stats(type = "common")
ggplot(temp, aes(x = timeCategory, y=mean, group = Prior.COVID.infection., color=Prior.COVID.infection.)) + geom_point(size=5) + geom_line(size=1, alpha=0.5) + 
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=0.2, size=0.5) + theme_bw() + 
  scale_color_manual(name="Prior COVID?", values = c("#FFC26A","#B5B2F1")) + 
  ggtitle("anti-S1 IgG") + xlab("") + ylab("anti-S1 IgG titer") + 
  theme(axis.text = element_text(color="black",size=16), axis.title = element_text(size=20), axis.text.x = element_text(angle=45, hjust=1,vjust=1),
        plot.title=element_text(size=24), legend.position = "none" ) + 
  scale_y_continuous(trans='pseudo_log', limits = c(10^4, 10^7), breaks=c(10^(0:7)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10))

# ggsave(filename = "./Images/BindingAb_S1_IgG_declines.pdf", width=2.5, height=6)

naive <- subset(subsetData, Prior.COVID.infection. == "No")
t.test(data = naive, binding_IgG_S1 ~ timeCategory, paired=T)
## 
##  Paired t-test
## 
## data:  binding_IgG_S1 by timeCategory
## t = 1.2171, df = 16, p-value = 0.2412
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -69672.94 257535.94
## sample estimates:
## mean of the differences 
##                 93931.5
experienced <- subset(subsetData, Prior.COVID.infection. == "Yes")
t.test(data = experienced, binding_IgG_S1 ~ timeCategory, paired=T)
## 
##  Paired t-test
## 
## data:  binding_IgG_S1 by timeCategory
## t = -0.50848, df = 14, p-value = 0.619
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -433790.4  267525.7
## sample estimates:
## mean of the differences 
##               -83132.36
subsetData <- mergedData[-which(mergedData$timeCategory == "two Weeks"),] 
subsetData$timeCategory <- factor(subsetData$timeCategory, levels = c("Baseline", "Post 1st dose", "Pre 2nd dose", "Post 2nd dose", "One month post\n2nd dose")) 
linePlot(data = subsetData, xData = 'timeCategory', yData = 'binding_IgA_S1', groupby = 'Record.ID', xLabel = ' ', yLabel = "anti-S1 IgA titer", 
         title = "anti-S1 IgA titer", colorby = "Prior.COVID.infection.") + 
  geom_hline(yintercept = 25, linetype = "dashed",alpha=0.3) + annotate("text", x=5,y=15,label = "LOD", color="black", alpha=0.2)+
  scale_y_continuous(trans='pseudo_log', limits = c(0,1e6), breaks=c(10^(0:7)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10)) #+ 

# ggsave(filename = "./Images/BindingAb_S1_IgA_linePlot.pdf")

# bartlett.test(binding_IgA_S1 ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
kruskal_test(formula = binding_IgA_S1 ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No')) 
## # A tibble: 1 x 6
##   .y.                n statistic    df          p method        
## * <chr>          <int>     <dbl> <int>      <dbl> <chr>         
## 1 binding_IgA_S1   103      30.8     4 0.00000334 Kruskal-Wallis
dunn_test(binding_IgA_S1 ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## # A tibble: 10 x 9
##    .y.     group1   group2       n1    n2 statistic       p   p.adj p.adj.signif
##  * <chr>   <chr>    <chr>     <int> <int>     <dbl>   <dbl>   <dbl> <chr>       
##  1 bindin~ Baseline "Post 1s~     9    10     0.437 6.62e-1 1.00e+0 ns          
##  2 bindin~ Baseline "Pre 2nd~     9    10     3.86  1.11e-4 8.89e-4 ***         
##  3 bindin~ Baseline "Post 2n~     9     6     4.28  1.84e-5 1.84e-4 ***         
##  4 bindin~ Baseline "One mon~     9     1     0.952 3.41e-1 1.00e+0 ns          
##  5 bindin~ Post 1s~ "Pre 2nd~    10    10     3.52  4.28e-4 3.00e-3 **          
##  6 bindin~ Post 1s~ "Post 2n~    10     6     3.98  6.81e-5 6.13e-4 ***         
##  7 bindin~ Post 1s~ "One mon~    10     1     0.765 4.44e-1 1.00e+0 ns          
##  8 bindin~ Pre 2nd~ "Post 2n~    10     6     0.933 3.51e-1 1.00e+0 ns          
##  9 bindin~ Pre 2nd~ "One mon~    10     1    -0.737 4.61e-1 1.00e+0 ns          
## 10 bindin~ Post 2n~ "One mon~     6     1    -1.16  2.46e-1 1.00e+0 ns
bartlett.test(binding_IgA_S1 ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes'))
## 
##  Bartlett test of homogeneity of variances
## 
## data:  binding_IgA_S1 by timeCategory
## Bartlett's K-squared = 70.408, df = 4, p-value = 1.862e-14
kruskal_test(formula = binding_IgA_S1 ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes')) 
## # A tibble: 1 x 6
##   .y.                n statistic    df         p method        
## * <chr>          <int>     <dbl> <int>     <dbl> <chr>         
## 1 binding_IgA_S1    74      23.8     4 0.0000875 Kruskal-Wallis
dunn_test(binding_IgA_S1 ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes'))
## # A tibble: 10 x 9
##    .y.     group1   group2       n1    n2 statistic       p   p.adj p.adj.signif
##  * <chr>   <chr>    <chr>     <int> <int>     <dbl>   <dbl>   <dbl> <chr>       
##  1 bindin~ Baseline "Post 1s~    12     9    3.32   8.93e-4 7.15e-3 **          
##  2 bindin~ Baseline "Pre 2nd~    12    10    3.77   1.65e-4 1.48e-3 **          
##  3 bindin~ Baseline "Post 2n~    12    10    4.23   2.36e-5 2.36e-4 ***         
##  4 bindin~ Baseline "One mon~    12     6    2.95   3.13e-3 2.19e-2 *           
##  5 bindin~ Post 1s~ "Pre 2nd~     9    10    0.323  7.47e-1 1.00e+0 ns          
##  6 bindin~ Post 1s~ "Post 2n~     9    10    0.751  4.52e-1 1.00e+0 ns          
##  7 bindin~ Post 1s~ "One mon~     9     6    0.0231 9.82e-1 1.00e+0 ns          
##  8 bindin~ Pre 2nd~ "Post 2n~    10    10    0.440  6.60e-1 1.00e+0 ns          
##  9 bindin~ Pre 2nd~ "One mon~    10     6   -0.264  7.92e-1 1.00e+0 ns          
## 10 bindin~ Post 2n~ "One mon~    10     6   -0.645  5.19e-1 1.00e+0 ns
# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "binding_IgA_S1")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/fig6B.csv")



subsetData <- mergedData[-which(mergedData$timeCategory == "two Weeks"),] ; 
# subsetData$timeCategory <- factor(subsetData$timeCategory, levels = c("Baseline", "Post 1st dose", "Pre 2nd dose", "Post 2nd dose"))
linePlot(data = subsetData, xData = 'timeCategory', yData = 'IC50_neutAb_log10', groupby = 'Record.ID', xLabel = ' ', yLabel = "log10 IC50", 
         title = "Neutralizing antibodies", colorby = "Prior.COVID.infection.") + 
  geom_hline(yintercept = 10, linetype = "dashed",alpha=0.3) + annotate("text", x=5,y=15,label = "LOD", color="black", alpha=0.2)+
  scale_y_continuous(trans='pseudo_log', limits = c(0,3e5), breaks=c(10^(0:6)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10)) 

# ggsave(filename = "./Images/neutAb_linePlot.pdf", width=8)

bartlett.test(IC50_neutAb_log10 ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## 
##  Bartlett test of homogeneity of variances
## 
## data:  IC50_neutAb_log10 by timeCategory
## Bartlett's K-squared = Inf, df = 4, p-value < 2.2e-16
kruskal_test(formula = IC50_neutAb_log10 ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No')) 
## # A tibble: 1 x 6
##   .y.                   n statistic    df           p method        
## * <chr>             <int>     <dbl> <int>       <dbl> <chr>         
## 1 IC50_neutAb_log10   103      33.8     4 0.000000832 Kruskal-Wallis
dunn_test(IC50_neutAb_log10 ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## # A tibble: 10 x 9
##    .y.     group1   group2       n1    n2 statistic       p   p.adj p.adj.signif
##  * <chr>   <chr>    <chr>     <int> <int>     <dbl>   <dbl>   <dbl> <chr>       
##  1 IC50_n~ Baseline "Post 1s~     5    10     0.114 9.09e-1 1.00e+0 ns          
##  2 IC50_n~ Baseline "Pre 2nd~     5    10     2.32  2.03e-2 1.02e-1 ns          
##  3 IC50_n~ Baseline "Post 2n~     5     9     3.93  8.51e-5 6.81e-4 ***         
##  4 IC50_n~ Baseline "One mon~     5     8     3.46  5.40e-4 3.78e-3 **          
##  5 IC50_n~ Post 1s~ "Pre 2nd~    10    10     2.70  6.89e-3 4.13e-2 *           
##  6 IC50_n~ Post 1s~ "Post 2n~    10     9     4.63  3.57e-6 3.57e-5 ****        
##  7 IC50_n~ Post 1s~ "One mon~    10     8     4.03  5.64e-5 5.08e-4 ***         
##  8 IC50_n~ Pre 2nd~ "Post 2n~    10     9     2.00  4.50e-2 1.80e-1 ns          
##  9 IC50_n~ Pre 2nd~ "One mon~    10     8     1.48  1.39e-1 4.17e-1 ns          
## 10 IC50_n~ Post 2n~ "One mon~     9     8    -0.451 6.52e-1 1.00e+0 ns
bartlett.test(IC50_neutAb_log10 ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes'))
## 
##  Bartlett test of homogeneity of variances
## 
## data:  IC50_neutAb_log10 by timeCategory
## Bartlett's K-squared = 52.053, df = 4, p-value = 1.345e-10
kruskal_test(formula = IC50_neutAb_log10 ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes')) 
## # A tibble: 1 x 6
##   .y.                   n statistic    df        p method        
## * <chr>             <int>     <dbl> <int>    <dbl> <chr>         
## 1 IC50_neutAb_log10    74      21.1     4 0.000296 Kruskal-Wallis
dunn_test(IC50_neutAb_log10 ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes'))
## # A tibble: 10 x 9
##    .y.     group1   group2       n1    n2 statistic       p   p.adj p.adj.signif
##  * <chr>   <chr>    <chr>     <int> <int>     <dbl>   <dbl>   <dbl> <chr>       
##  1 IC50_n~ Baseline "Post 1s~    10    10    3.37   7.65e-4 6.88e-3 **          
##  2 IC50_n~ Baseline "Pre 2nd~    10     9    3.33   8.81e-4 6.88e-3 **          
##  3 IC50_n~ Baseline "Post 2n~    10     9    3.36   7.74e-4 6.88e-3 **          
##  4 IC50_n~ Baseline "One mon~    10     8    4.01   6.09e-5 6.09e-4 ***         
##  5 IC50_n~ Post 1s~ "Pre 2nd~    10     9    0.0504 9.60e-1 1.00e+0 ns          
##  6 IC50_n~ Post 1s~ "Post 2n~    10     9    0.0865 9.31e-1 1.00e+0 ns          
##  7 IC50_n~ Post 1s~ "One mon~    10     8    0.836  4.03e-1 1.00e+0 ns          
##  8 IC50_n~ Pre 2nd~ "Post 2n~     9     9    0.0351 9.72e-1 1.00e+0 ns          
##  9 IC50_n~ Pre 2nd~ "One mon~     9     8    0.769  4.42e-1 1.00e+0 ns          
## 10 IC50_n~ Post 2n~ "One mon~     9     8    0.735  4.63e-1 1.00e+0 ns
# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "IC50_neutAb_log10")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/fig6D.csv")

subsetData <- mergedData[-which(mergedData$timeCategory == "two Weeks"),] ; 
linePlot(data = subsetData, xData = 'timeCategory', yData = 'IC50_neutAb_log10', groupby = 'Record.ID', xLabel = ' ', yLabel = "log10 IC50", 
         title = "Neutralizing antibodies", colorby = "Prior.COVID.infection.", recentCOVID = T) + 
  geom_hline(yintercept = 10, linetype = "dashed",alpha=0.3) + annotate("text", x=5,y=15,label = "LOD", color="black", alpha=0.2)+
  scale_y_continuous(trans='pseudo_log', limits = c(0,1e5), breaks=c(10^(0:6)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10))

# ggsave(filename = "./Images/neutAb_linePlot_recentCOVID.pdf")

Post 2nd dose

subsetData <- subset(mergedData, timeCategory == "Post 2nd dose")
twoSampleBar(data = subsetData, xData = "Prior.COVID.infection.", yData = "IC50_neutAb_log10", fillParam = "Prior.COVID.infection.", 
             title = "Post 2nd dose", yLabel = "Neutralizing antibodies", nonparam = T) +   coord_cartesian(ylim=c(0e1,1e5),) + 
  scale_y_continuous(trans='pseudo_log', breaks=c(10^(0:7)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10))
## Warning: Removed 17 rows containing missing values (position_quasirandom).

# ggsave(filename = "./Images/neutAb_post2ndDose.pdf", width=5)

# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "IC50_neutAb_log10")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/figs6G.csv")

One month post 2nd dose

subsetData <- subset(mergedData, timeCategory == "One month post\n2nd dose")
twoSampleBar(data = subsetData, xData = "Prior.COVID.infection.", yData = "IC50_neutAb_log10", fillParam = "Prior.COVID.infection.", 
             title = "One month post", yLabel = "Neutralizing antibodies", nonparam = T) +   coord_cartesian(ylim=c(0e1,1e5),) + 
  scale_y_continuous(trans='pseudo_log', breaks=c(10^(0:7)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10)) 
## Warning: Removed 18 rows containing missing values (position_quasirandom).

# ggsave(filename = "./Images/neutAb_oneMonthpost2ndDose.pdf", width=5)

# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "IC50_neutAb_log10")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/figs6H.csv")

N antibodies

subsetData <- mergedData[-which(mergedData$timeCategory == "two Weeks"),] ; 
linePlot(data = subsetData, xData = 'timeCategory', yData = 'binding_IgG_N', groupby = 'Record.ID', xLabel = ' ', yLabel = "anti-N IgG titer", 
         title = "anti-N IgG titer", colorby = "Prior.COVID.infection.") +  
  geom_hline(yintercept = 50, linetype = "dashed",alpha=0.3) + annotate("text", x=4.1,y=25,label = "LOD", color="black", alpha=0.2)+
  scale_y_continuous(trans='pseudo_log', limits = c(0,1e5), breaks=c(10^(0:6)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10))

# ggsave(filename = "./Images/BindingAb_N_IgG_linePlot.pdf", width=7)

# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "binding_IgG_N")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/figs6D.csv")

Baseline N antibodies

subsetData <- subset(mergedData, timeCategory == "Baseline")
twoSampleBar(data = subsetData, xData = "Prior.COVID.infection.", yData = "binding_IgG_N", fillParam = "Prior.COVID.infection.", 
             title = "Baseline", yLabel = "Anti-N IgG titer", nonparam = T) + coord_cartesian(ylim=c(0e1,1e5),) + 
  scale_y_continuous(trans='pseudo_log', breaks=c(10^(0:7)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10)) +
  annotate("text", x=2.5,y=50,label = "LOD", color="black", alpha=0.2) +  geom_hline(yintercept = 50, linetype = "dashed",alpha=0.3)
## Warning: Removed 4 rows containing missing values (position_quasirandom).

# ggsave(filename = "./Images/bindingAb_N_IgG_baseline.pdf", width=5)

# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "binding_IgG_N")]
# write.csv(x = dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/figs6C.csv")


subsetData <- subset(mergedData, Prior.COVID.infection. == "Yes" & timeCategory == "Baseline")
univScatter(data = subsetData, xData = "binding_IgG_S1", yData = 'FC_IgG_S1_postVax1', 
            fillParam = 'Prior.COVID.infection.',title = "Post 1st dose Anti-S1 IgG", xLabel = "Baseline anti-S1 IgG titer", yLabel = "Fold-change anti-S1 IgG", nonparam = T) + 
  scale_fill_manual(values=c("#B5B2F1")) + 
  scale_x_continuous(trans='pseudo_log', limits = c(1e2,5e5), breaks=c(10^(0:7)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10)) +
  scale_y_continuous(trans='pseudo_log', limits = c(0,1e5), breaks=c(10^(0:7)), labels=trans_format('log10',math_format(10^.x)), minor_breaks =5*10^(0:10))
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## `geom_smooth()` using formula 'y ~ x'

# ggsave(filename = "./Images/BindingAb_S1_IgG_vs_FC_S1binding.pdf")

# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","binding_IgG_S1", "FC_IgG_S1_postVax1")]
# write.csv(x = out, file = "plottedData/figs6E.csv")



subsetData <- subset(mergedData, timeCategory == 'Baseline'); subsetData <- subsetData[which(!is.na(subsetData$binding_IgG_S1)),]
subsetData %>% levene_test( binding_IgG_S1 ~ Prior.COVID.infection.)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
## # A tibble: 1 x 4
##     df1   df2 statistic        p
##   <int> <int>     <dbl>    <dbl>
## 1     1    34      14.4 0.000585
subsetData %>% shapiro_test( binding_IgG_S1)
## # A tibble: 1 x 3
##   variable       statistic             p
##   <chr>              <dbl>         <dbl>
## 1 binding_IgG_S1     0.546 0.00000000219
subsetData %>%  wilcox_test(binding_IgG_S1 ~ Prior.COVID.infection.)
## # A tibble: 1 x 7
##   .y.            group1 group2    n1    n2 statistic            p
## * <chr>          <chr>  <chr>  <int> <int>     <dbl>        <dbl>
## 1 binding_IgG_S1 No     Yes       21    15         0 0.0000000314
subsetData %>% group_by(Prior.COVID.infection.) %>% get_summary_stats(binding_IgG_S1)
## # A tibble: 2 x 14
##   Prior.COVID.inf~ variable     n   min    max median    q1     q3   iqr   mad
##   <chr>            <chr>    <dbl> <dbl>  <dbl>  <dbl> <dbl>  <dbl> <dbl> <dbl>
## 1 No               binding~    21   25  3.95e1    25    25     25     0     0 
## 2 Yes              binding~    15  323. 3.21e4  5991. 2350. 10901. 8551. 6271.
## # ... with 4 more variables: mean <dbl>, sd <dbl>, se <dbl>, ci <dbl>
subsetData <- subset(mergedData, timeCategory == 'Post 2nd dose'); subsetData <- subsetData[which(!is.na(subsetData$binding_IgG_S1)),]
subsetData %>% levene_test( binding_IgG_S1 ~ Prior.COVID.infection.)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
## # A tibble: 1 x 4
##     df1   df2 statistic     p
##   <int> <int>     <dbl> <dbl>
## 1     1    30     0.395 0.535
subsetData %>% shapiro_test( binding_IgG_S1)
## # A tibble: 1 x 3
##   variable       statistic            p
##   <chr>              <dbl>        <dbl>
## 1 binding_IgG_S1     0.602 0.0000000396
subsetData %>%  wilcox_test(binding_IgG_S1 ~ Prior.COVID.infection.)
## # A tibble: 1 x 7
##   .y.            group1 group2    n1    n2 statistic      p
## * <chr>          <chr>  <chr>  <int> <int>     <dbl>  <dbl>
## 1 binding_IgG_S1 No     Yes       17    15        77 0.0583
subsetData <- subset(mergedData, timeCategory == 'One month post\n2nd dose'); subsetData <- subsetData[which(!is.na(subsetData$binding_IgG_S1)),]
subsetData %>% group_by(Prior.COVID.infection.) %>% get_summary_stats(binding_IgG_S1)
## # A tibble: 2 x 14
##   Prior.COVID.inf~ variable     n    min    max median     q1     q3    iqr
##   <chr>            <chr>    <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
## 1 No               binding~    17 9.34e4 9.91e5 2.31e5 1.41e5 3.98e5 2.58e5
## 2 Yes              binding~    15 2.48e5 2.70e6 4.04e5 3.57e5 6.01e5 2.44e5
## # ... with 5 more variables: mad <dbl>, mean <dbl>, sd <dbl>, se <dbl>,
## #   ci <dbl>
subsetData %>% levene_test( binding_IgG_S1 ~ Prior.COVID.infection.)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
## # A tibble: 1 x 4
##     df1   df2 statistic     p
##   <int> <int>     <dbl> <dbl>
## 1     1    30      1.86 0.183
subsetData %>% shapiro_test( binding_IgG_S1)
## # A tibble: 1 x 3
##   variable       statistic           p
##   <chr>              <dbl>       <dbl>
## 1 binding_IgG_S1     0.641 0.000000127
subsetData %>%  wilcox_test(binding_IgG_S1 ~ Prior.COVID.infection.)
## # A tibble: 1 x 7
##   .y.            group1 group2    n1    n2 statistic       p
## * <chr>          <chr>  <chr>  <int> <int>     <dbl>   <dbl>
## 1 binding_IgG_S1 No     Yes       17    15        55 0.00535
subsetData %>% group_by(Prior.COVID.infection.) %>% get_summary_stats(FC_IgG_S1_postVax1)
## # A tibble: 2 x 14
##   Prior.COVID.inf~ variable     n   min   max median    q1     q3    iqr    mad
##   <chr>            <chr>    <dbl> <dbl> <dbl>  <dbl> <dbl>  <dbl>  <dbl>  <dbl>
## 1 No               FC_IgG_~    15 0.894  79.1   2.66  1.37   8.40   7.03   2.44
## 2 Yes              FC_IgG_~    15 0.697 868.   92.4  30.7  162.   131.   115.  
## # ... with 4 more variables: mean <dbl>, sd <dbl>, se <dbl>, ci <dbl>
subsetData %>% shapiro_test(FC_IgG_S1_postVax1)
## # A tibble: 1 x 3
##   variable           statistic             p
##   <chr>                  <dbl>         <dbl>
## 1 FC_IgG_S1_postVax1     0.516 0.00000000796
subsetData %>%  wilcox_test(FC_IgG_S1_postVax1 ~ Prior.COVID.infection.)
## # A tibble: 1 x 7
##   .y.                group1 group2    n1    n2 statistic       p
## * <chr>              <chr>  <chr>  <int> <int>     <dbl>   <dbl>
## 1 FC_IgG_S1_postVax1 No     Yes       17    15        38 0.00141
subsetData %>% group_by(Prior.COVID.infection.) %>% get_summary_stats(FC_IgG_S1_postVax2)
## # A tibble: 2 x 14
##   Prior.COVID.inf~ variable     n   min   max median    q1    q3    iqr   mad
##   <chr>            <chr>    <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl>  <dbl> <dbl>
## 1 No               FC_IgG_~    15 1.74   39.5  12.5  8.26   19.6 11.4    7.97
## 2 Yes              FC_IgG_~    14 0.611  28.3   1.29 0.955   1.6  0.645  0.52
## # ... with 4 more variables: mean <dbl>, sd <dbl>, se <dbl>, ci <dbl>
subsetData %>% shapiro_test(FC_IgG_S1_postVax2)
## # A tibble: 1 x 3
##   variable           statistic         p
##   <chr>                  <dbl>     <dbl>
## 1 FC_IgG_S1_postVax2     0.772 0.0000278
subsetData %>%  wilcox_test(FC_IgG_S1_postVax2 ~ Prior.COVID.infection.)
## # A tibble: 1 x 7
##   .y.                group1 group2    n1    n2 statistic         p
## * <chr>              <chr>  <chr>  <int> <int>     <dbl>     <dbl>
## 1 FC_IgG_S1_postVax2 No     Yes       17    15       195 0.0000176
subsetData <- subset(mergedData, timeCategory == 'Baseline'); subsetData <- subsetData[which(!is.na(subsetData$binding_IgG_S1)),]
subsetData %>% group_by(Prior.COVID.infection.) %>% get_summary_stats(IC50_neutAb_log10)
## # A tibble: 2 x 14
##   Prior.COVID.inf~ variable     n   min   max median    q1    q3   iqr   mad
##   <chr>            <chr>    <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 No               IC50_ne~     5    10   10     10   10     10     0     0 
## 2 Yes              IC50_ne~    10    10  949.   128.  42.8  234.  191.  151.
## # ... with 4 more variables: mean <dbl>, sd <dbl>, se <dbl>, ci <dbl>
subsetData %>% levene_test( IC50_neutAb_log10 ~ Prior.COVID.infection.)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
## # A tibble: 1 x 4
##     df1   df2 statistic     p
##   <int> <int>     <dbl> <dbl>
## 1     1    13      2.95 0.110
subsetData %>% shapiro_test( IC50_neutAb_log10)
## # A tibble: 1 x 3
##   variable          statistic         p
##   <chr>                 <dbl>     <dbl>
## 1 IC50_neutAb_log10     0.629 0.0000481
subsetData %>%  wilcox_test(IC50_neutAb_log10 ~ Prior.COVID.infection.)
## # A tibble: 1 x 7
##   .y.               group1 group2    n1    n2 statistic      p
## * <chr>             <chr>  <chr>  <int> <int>     <dbl>  <dbl>
## 1 IC50_neutAb_log10 No     Yes       21    15         5 0.0118
subsetData <- subset(mergedData, timeCategory == 'Post 1st dose'); subsetData <- subsetData[which(!is.na(subsetData$binding_IgG_S1)),]
subsetData %>% group_by(Prior.COVID.infection.) %>% get_summary_stats(IC50_neutAb_log10)
## # A tibble: 2 x 14
##   Prior.COVID.inf~ variable     n   min    max median    q1    q3   iqr   mad
##   <chr>            <chr>    <dbl> <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 No               IC50_ne~    10    10 1.59e1    10    10    10     0     0 
## 2 Yes              IC50_ne~    10    39 3.22e4  4084. 2220. 9115. 6894. 4600.
## # ... with 4 more variables: mean <dbl>, sd <dbl>, se <dbl>, ci <dbl>
subsetData %>% levene_test( IC50_neutAb_log10 ~ Prior.COVID.infection.)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
## # A tibble: 1 x 4
##     df1   df2 statistic      p
##   <int> <int>     <dbl>  <dbl>
## 1     1    18      5.49 0.0308
subsetData %>% shapiro_test( IC50_neutAb_log10)
## # A tibble: 1 x 3
##   variable          statistic          p
##   <chr>                 <dbl>      <dbl>
## 1 IC50_neutAb_log10     0.581 0.00000191
subsetData %>%  wilcox_test(IC50_neutAb_log10 ~ Prior.COVID.infection.)
## # A tibble: 1 x 7
##   .y.               group1 group2    n1    n2 statistic         p
## * <chr>             <chr>  <chr>  <int> <int>     <dbl>     <dbl>
## 1 IC50_neutAb_log10 No     Yes       17    15         0 0.0000874
subsetData <- subset(mergedData, timeCategory == 'Post 2nd dose'); subsetData <- subsetData[which(!is.na(subsetData$binding_IgG_S1)),]
subsetData %>% group_by(Prior.COVID.infection.) %>% get_summary_stats(IC50_neutAb_log10)
## # A tibble: 2 x 14
##   Prior.COVID.inf~ variable     n   min   max median    q1    q3   iqr   mad
##   <chr>            <chr>    <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 No               IC50_ne~     9  637.  3496   1399  1173  1939   766  578.
## 2 Yes              IC50_ne~     9 1173  21199   3200  2809  6380  3571 3005.
## # ... with 4 more variables: mean <dbl>, sd <dbl>, se <dbl>, ci <dbl>
subsetData %>% levene_test( IC50_neutAb_log10 ~ Prior.COVID.infection.)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
## # A tibble: 1 x 4
##     df1   df2 statistic      p
##   <int> <int>     <dbl>  <dbl>
## 1     1    16      3.08 0.0982
subsetData %>% shapiro_test( IC50_neutAb_log10)
## # A tibble: 1 x 3
##   variable          statistic         p
##   <chr>                 <dbl>     <dbl>
## 1 IC50_neutAb_log10     0.630 0.0000137
subsetData %>%  wilcox_test(IC50_neutAb_log10 ~ Prior.COVID.infection.)
## # A tibble: 1 x 7
##   .y.               group1 group2    n1    n2 statistic      p
## * <chr>             <chr>  <chr>  <int> <int>     <dbl>  <dbl>
## 1 IC50_neutAb_log10 No     Yes       17    15      13.5 0.0192
subsetData <- mergedData[which(!is.na(mergedData$FC_Elispot_IgG_S1)),]
subsetData <- subsetData[which(is.finite(subsetData$FC_Elispot_IgG_S1)),]
subsetData %>% group_by(Prior.COVID.infection.) %>% get_summary_stats(FC_Elispot_IgG_S1)
## # A tibble: 2 x 14
##   Prior.COVID.inf~ variable     n   min   max median    q1     q3   iqr   mad
##   <chr>            <chr>    <dbl> <dbl> <dbl>  <dbl> <dbl>  <dbl> <dbl> <dbl>
## 1 No               FC_Elis~    10     2  12.0  7.01  2     12.0   10.0  7.42 
## 2 Yes              FC_Elis~    39     0   0.6  0.222 0.015  0.335  0.32 0.295
## # ... with 4 more variables: mean <dbl>, sd <dbl>, se <dbl>, ci <dbl>

#—————— Avidity analyses ————————–

subsetData <- subset(mergedData,  timeCategory != "two Weeks" & timeCategory != "2 wks post 2nd dose");   
subsetData$timeCategory <- factor(subsetData$timeCategory, levels = c("Baseline", "Post 1st dose", "Pre 2nd dose", "Post 2nd dose","One month post\n2nd dose"))
a<- linePlot(data = subsetData, xData = 'timeCategory', yData = 'Avidity', groupby = 'Record.ID', xLabel = " ", yLabel = "IgG avidity (%)", 
             title = "anti-S1 Avidity", colorby = "Prior.COVID.infection.") + theme(axis.title.x = element_blank()) + 
  scale_color_manual(name="Prior COVID?",values = c("#FFC26A","#B5B2F1")) + scale_y_continuous(limits = c(0,110),breaks=seq(0,140,10)) #+ 
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
a

# ggsave(filename = "./Images/avidity_lineplot.pdf", width=8)
# plotly::ggplotly(a)

bartlett.test(Avidity ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## 
##  Bartlett test of homogeneity of variances
## 
## data:  Avidity by timeCategory
## Bartlett's K-squared = 0.12491, df = 2, p-value = 0.9395
# tukey_hsd( aov(Avidity ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No')) )
dunn_test(Avidity ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'No'))
## # A tibble: 3 x 9
##   .y.    group1   group2          n1    n2 statistic       p  p.adj p.adj.signif
## * <chr>  <chr>    <chr>        <int> <int>     <dbl>   <dbl>  <dbl> <chr>       
## 1 Avidi~ Pre 2nd~ "Post 2nd d~     7     2     1.09  0.278   0.556  ns          
## 2 Avidi~ Pre 2nd~ "One month ~     7     7     2.81  0.00500 0.0150 *           
## 3 Avidi~ Post 2n~ "One month ~     2     7     0.786 0.432   0.556  ns
bartlett.test(Avidity ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes'))
## 
##  Bartlett test of homogeneity of variances
## 
## data:  Avidity by timeCategory
## Bartlett's K-squared = 17.508, df = 2, p-value = 0.0001578
dunn_test(Avidity ~ timeCategory, data=subset(subsetData, Prior.COVID.infection. == 'Yes'))
## # A tibble: 3 x 9
##   .y.    group1    group2            n1    n2 statistic     p p.adj p.adj.signif
## * <chr>  <chr>     <chr>          <int> <int>     <dbl> <dbl> <dbl> <chr>       
## 1 Avidi~ Baseline  "Pre 2nd dose"     6     6    -0.216 0.829 0.829 ns          
## 2 Avidi~ Baseline  "One month po~     6     6    -1.57  0.117 0.351 ns          
## 3 Avidi~ Pre 2nd ~ "One month po~     6     6    -1.35  0.176 0.353 ns
# out <- subsetData[,c("Record.ID", "Prior.COVID.infection.","timeCategory", "Avidity")]
# write.csv(x =  dcast(data = out, formula = Record.ID + Prior.COVID.infection. ~ timeCategory), file = "plottedData/fig6E.csv")



linePlot(data = subsetData, xData = 'timeCategory', yData = 'Avidity', groupby = 'Record.ID', xLabel = ' ', yLabel = "IgG avidity (%)", 
         title = "anti-S1 Avidity", colorby = "Prior.COVID.infection.", recentCOVID = T) + theme(axis.title.x = element_blank()) + 
  scale_color_manual(name="Prior COVID?",values = c("grey60","grey60")) + scale_y_continuous(limits = c(0,110),breaks=seq(0,140,10)) 
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

# ggsave(filename = "./Images/avidity_lineplot_recentCOVID.pdf")